From 22673d61a130baa5597966c0ee0b7c03faaebb36 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:13:26 -0800 Subject: [PATCH 01/14] create a view for a search_layer to return mvt, re #10502 --- arches/app/views/search_layer.py | 179 +++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 arches/app/views/search_layer.py diff --git a/arches/app/views/search_layer.py b/arches/app/views/search_layer.py new file mode 100644 index 0000000000..4c5e04d4ac --- /dev/null +++ b/arches/app/views/search_layer.py @@ -0,0 +1,179 @@ +from django.views import View + +# from django.http import JsonResponse +import json +from django.core.cache import caches +from arches.app.models.system_settings import settings +from django.utils.translation import gettext as _ + +# from arches.app.search.search_engine_factory import SearchEngineFactory +from django.db import connection +from django.http import Http404, HttpResponse + + +class SearchLayer(View): + def get(self, request, zoom, x, y): + # se = SearchEngineFactory().create() + searchid = request.GET.get("searchid", None) + if not searchid: + raise Http404(_("Missing 'searchid' query parameter.")) + EARTHCIRCUM = 40075016.6856 + PIXELSPERTILE = 256 + cache = caches["default"] + resource_ids = cache.get(searchid) + if resource_ids: + resource_ids = json.loads(resource_ids) + else: + print(f"no resourceids found in cache for searchid: {searchid}") + raise Http404(_("Missing resourceids from search cache.")) + + search_geom_count = 0 + cache_key = create_searchlayer_mvt_cache_key(searchid, zoom, x, y, request.user) + tile = cache.get(cache_key) + if tile is None: + with connection.cursor() as cursor: + if len(resource_ids) == 0: + resource_ids.append( + "10000000-0000-0000-0000-000000000001" + ) # This must have a uuid that will never be a resource id. + resource_ids = tuple(resource_ids) + + if int(zoom) < 14: + arc = EARTHCIRCUM / ((1 << int(zoom)) * PIXELSPERTILE) + distance = arc * float(1000) + min_points = 3 + distance = ( + settings.CLUSTER_DISTANCE_MAX + if distance > settings.CLUSTER_DISTANCE_MAX + else distance + ) + + count_query = """ + SELECT count(*) FROM geojson_geometries + WHERE + ST_Intersects(geom, TileBBox(%s, %s, %s, 3857)) + AND + resourceinstanceid in %s + """ + + # get the count of matching geometries + cursor.execute( + count_query, + [ + zoom, + x, + y, + resource_ids, + ], + ) + search_geom_count = cursor.fetchone()[0] + + if search_geom_count >= min_points: + cursor.execute( + """WITH clusters(tileid, resourceinstanceid, nodeid, geom, cid) + AS ( + SELECT m.*, + ST_ClusterDBSCAN(geom, eps := %s, minpoints := %s) over () AS cid + FROM ( + SELECT tileid, + resourceinstanceid, + nodeid, + geom + FROM geojson_geometries + WHERE + ST_Intersects(geom, TileBBox(%s, %s, %s, 3857)) + AND + resourceinstanceid in %s + ) m + ) + SELECT ST_AsMVT( + tile, + 'search_layer', + 4096, + 'geom', + 'id' + ) FROM ( + SELECT resourceinstanceid::text, + row_number() over () as id, + 1 as point_count, + ST_AsMVTGeom( + geom, + TileBBox(%s, %s, %s, 3857) + ) AS geom, + '' AS extent + FROM clusters + WHERE cid is NULL + UNION + SELECT NULL as resourceinstanceid, + row_number() over () as id, + count(*) as point_count, + ST_AsMVTGeom( + ST_Centroid( + ST_Collect(geom) + ), + TileBBox(%s, %s, %s, 3857) + ) AS geom, + ST_AsGeoJSON( + ST_Extent(geom) + ) AS extent + FROM clusters + WHERE cid IS NOT NULL + GROUP BY cid + ) as tile;""", + [ + distance, + min_points, + zoom, + x, + y, + resource_ids, + zoom, + x, + y, + zoom, + x, + y, + ], + ) + elif search_geom_count: + cursor.execute( + """SELECT ST_AsMVT(tile, 'search_layer', 4096, 'geom', 'id') FROM (SELECT tileid, + id, + resourceinstanceid, + nodeid, + featureid::text AS featureid, + ST_AsMVTGeom( + geom, + TileBBox(%s, %s, %s, 3857) + ) AS geom, + 1 AS point_count + FROM geojson_geometries + WHERE resourceinstanceid in %s and (geom && ST_TileEnvelope(%s, %s, %s))) AS tile;""", + [zoom, x, y, resource_ids, zoom, x, y], + ) + else: + tile = "" + + cursor.execute( + """SELECT ST_AsMVT(tile, 'search_layer', 4096, 'geom', 'id') FROM (SELECT tileid, + id, + resourceinstanceid, + nodeid, + featureid::text AS featureid, + ST_AsMVTGeom( + geom, + TileBBox(%s, %s, %s, 3857) + ) AS geom, + 1 AS point_count + FROM geojson_geometries + WHERE resourceinstanceid in %s and (geom && ST_TileEnvelope(%s, %s, %s))) AS tile;""", + [zoom, x, y, resource_ids, zoom, x, y], + ) + tile = bytes(cursor.fetchone()[0]) if tile is None else tile + cache.set(cache_key, tile, settings.TILE_CACHE_TIMEOUT) + + return HttpResponse(tile, content_type="application/x-protobuf") + + +def create_searchlayer_mvt_cache_key(searchid_hash, zoom, x, y, user): + return f"searchlayer_mvt_{searchid_hash}_{zoom}_{x}_{y}_{user}" From 5e07bb396cb88ab537a387915d76eaaa61cbeebf Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:14:00 -0800 Subject: [PATCH 02/14] create a url for search_layer, re #10502 --- arches/urls.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arches/urls.py b/arches/urls.py index e2772c1999..29fb817f88 100644 --- a/arches/urls.py +++ b/arches/urls.py @@ -56,6 +56,7 @@ ResourceActivityStreamPageView, ResourceActivityStreamCollectionView, ) +from arches.app.views.search_layer import SearchLayer from arches.app.views.plugin import PluginView from arches.app.views.workflow_history import WorkflowHistoryView from arches.app.views.concept import RDMView @@ -675,6 +676,11 @@ api.MVT.as_view(), name="mvt", ), + path( + "search-layer///.pbf", + SearchLayer.as_view(), + name="search_layer", + ), re_path(r"^images$", api.Images.as_view(), name="images"), re_path( r"^ontology_properties$", From 1a70a818f2ec535fba66f00c687958f05cfd7477 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:16:18 -0800 Subject: [PATCH 03/14] rm updateSearchResultsLayer calls in subscriptions, re #10502 --- arches/app/media/js/views/components/search/map-filter.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arches/app/media/js/views/components/search/map-filter.js b/arches/app/media/js/views/components/search/map-filter.js index 2f00cce545..122bbdea62 100644 --- a/arches/app/media/js/views/components/search/map-filter.js +++ b/arches/app/media/js/views/components/search/map-filter.js @@ -369,14 +369,6 @@ define([ this.updateFilter(); }, this); - this.searchAggregations.subscribe(this.updateSearchResultsLayers, this); - if (ko.isObservable(bins)) { - bins.subscribe(this.updateSearchResultsLayers, this); - } - if (this.searchAggregations()) { - this.updateSearchResultsLayers(); - } - this.mouseoverInstanceId.subscribe(updateSearchResultPointLayer); }, this); }, From 5f6c426f0568e3617dacbef6bdd97bdf264bc095 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:17:12 -0800 Subject: [PATCH 04/14] pass searchQueryId to MapViewModel, re #10502 --- arches/app/media/js/views/components/search/map-filter.js | 1 + 1 file changed, 1 insertion(+) diff --git a/arches/app/media/js/views/components/search/map-filter.js b/arches/app/media/js/views/components/search/map-filter.js index 122bbdea62..d3ca1e2001 100644 --- a/arches/app/media/js/views/components/search/map-filter.js +++ b/arches/app/media/js/views/components/search/map-filter.js @@ -40,6 +40,7 @@ define([ options.name = "Map Filter"; BaseFilter.prototype.initialize.call(this, options); + options.searchQueryId = this.searchQueryId; options.sources = { "geojson-search-buffer-data": { "type": "geojson", From 5ea684a1ce01ae23b948f4e37ab18cdb6b14074f Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:19:48 -0800 Subject: [PATCH 05/14] stub out initial logic for adding, removing search layers in map vm, re #10502 --- arches/app/media/js/viewmodels/map.js | 255 ++++++++++++++++++++++++++ 1 file changed, 255 insertions(+) diff --git a/arches/app/media/js/viewmodels/map.js b/arches/app/media/js/viewmodels/map.js index eac6659c0a..e6678ecdae 100644 --- a/arches/app/media/js/viewmodels/map.js +++ b/arches/app/media/js/viewmodels/map.js @@ -12,6 +12,261 @@ define([ const viewModel = function(params) { var self = this; + const searchLayerIds = [ + 'searchtiles-unclustered-polygon-fill', + 'searchtiles-unclustered-point', + 'searchtiles-clusters', + 'searchtiles-clusters-halo', + 'searchtiles-cluster-count', + 'searchtiles-unclustered-polypoint' + ]; + const searchLayerDefinitions = [ + { + "id": "searchtiles-unclustered-polygon-fill", + "type": "fill", + "paint": { + "fill-color": "#fa6003", + "fill-opacity": 0.3, + "fill-outline-color": "#fa6003" + }, + "filter": [ + "==", + "$type", + "Polygon" + ], + "source": "search-layer-source", + "source-layer": "search_layer", + "minzoom": 10, + "tolerance": 0.75 + }, + { + "id": "searchtiles-unclustered-point", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": 6, + "circle-opacity": 1 + }, + "filter": [ + "!", + [ + "has", + "point_count" + ] + ], + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-clusters", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": [ + "step", + [ + "get", + "point_count" + ], + 10, + 100, + 20, + 750, + 30, + 1500, + 40, + 2500, + 50, + 5000, + 65 + ], + "circle-opacity": [ + "case", + [ + "boolean", + [ + "has", + "point_count" + ], + true + ], + 1, + 0 + ] + }, + "filter": [ + "all", + [ + "==", + "$type", + "Point" + ], + [ + "!=", + "highlight", + true + ] + ], + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-clusters-halo", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": [ + "step", + [ + "get", + "point_count" + ], + 20, + 100, + 30, + 750, + 40, + 1500, + 50, + 2500, + 60, + 5000, + 75 + ], + "circle-opacity": [ + "case", + [ + "boolean", + [ + "has", + "point_count" + ], + true + ], + 0.5, + 0 + ] + }, + "filter": [ + "all", + [ + "==", + "$type", + "Point" + ], + [ + "!=", + "highlight", + true + ] + ], + "maxzoom": 14, + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-cluster-count", + "type": "symbol", + "paint": { + "text-color": "#fff" + }, + "filter": [ + "has", + "point_count" + ], + "layout": { + "text-font": [ + "DIN Offc Pro Medium", + "Arial Unicode MS Bold" + ], + "text-size": 14, + "text-field": "{point_count}" + }, + "maxzoom": 14, + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-unclustered-polypoint", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": 0, + "circle-opacity": 0, + "circle-stroke-color": "#fff", + "circle-stroke-width": 0 + }, + "filter": [ + "!", + [ + "has", + "point_count" + ] + ], + "layout": { + "visibility": "none" + }, + "source": "search-layer-source", + "source-layer": "search_layer" + } + ]; + this.searchQueryId = params.searchQueryId; + this.searchQueryId.subscribe(function (searchId) { + if (searchId) { + self.addSearchLayer(searchId); + } else { + // optionally, remove the search layer if searchId becomes undefined + self.removeSearchLayer(); + } + }); + + this.addSearchLayer = function (searchId) { + console.log(searchId); + if (!self.map()) + return; + const tileUrlTemplate = `http://localhost:8000/search-layer/{z}/{x}/{y}.pbf?searchid=${encodeURIComponent(searchId)}`; + + // Remove existing source and layer if they exist + searchLayerIds.forEach(layerId => { + if (self.map().getLayer(layerId)) { + self.map().removeLayer(layerId); + } + if (self.map().getSource(layerId)) { + self.map().removeSource(layerId); + } + }); + if (self.map().getSource('search-layer-source')) { + self.map().removeSource('search-layer-source'); + } + + // Add the vector tile source + self.map().addSource('search-layer-source', { + type: 'vector', + tiles: [tileUrlTemplate], + minzoom: 0, + maxzoom: 22, + }); + + // Add the layer to display the data + searchLayerDefinitions.forEach(mapLayer => { + self.map().addLayer(mapLayer); + }); + + // Optionally, fit the map to the data bounds + // self.fitMapToDataBounds(searchId); + }; + + this.removeSearchLayer = function () { + searchLayerDefinitions.forEach(mapLayer => { + if (self.map().getLayer(mapLayer.id)) { + self.map().removeLayer(mapLayer.id); + } + }); + if (self.map().getSource('search-layer-source')) { + self.map().removeSource('search-layer-source'); + } + }; + var geojsonSourceFactory = function() { return { From 920a3fb34d951218684bffcbe1159500c9e70435 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:21:02 -0800 Subject: [PATCH 06/14] add hook for addSearchLayer in map subscription, re #10502 --- arches/app/media/js/viewmodels/map.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arches/app/media/js/viewmodels/map.js b/arches/app/media/js/viewmodels/map.js index e6678ecdae..d3f6dbf953 100644 --- a/arches/app/media/js/viewmodels/map.js +++ b/arches/app/media/js/viewmodels/map.js @@ -317,6 +317,10 @@ define([ map.fitBounds(ko.unwrap(params.bounds), boundingOptions); } + // If searchQueryId is already available, add the search layer + if (self.searchQueryId()) { + self.addSearchLayer(self.searchQueryId()); + } }); this.bounds = ko.observable(ko.unwrap(params.bounds) || arches.hexBinBounds); From 0bdb2d4c0b7d375875a32e3a3abb83b6380516d2 Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 11:32:51 -0800 Subject: [PATCH 07/14] define searchQueryId, override doQuery in standard-search-view, re #10502 --- .../components/search/standard-search-view.js | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/arches/app/media/js/views/components/search/standard-search-view.js b/arches/app/media/js/views/components/search/standard-search-view.js index e311bcf438..7134caf193 100644 --- a/arches/app/media/js/views/components/search/standard-search-view.js +++ b/arches/app/media/js/views/components/search/standard-search-view.js @@ -15,6 +15,8 @@ define([ this.selectedPopup = ko.observable(''); this.sharedStateObject.selectedPopup = this.selectedPopup; + this.searchQueryId = ko.observable(null); + this.sharedStateObject.searchQueryId = this.searchQueryId; var firstEnabledFilter = _.find(this.sharedStateObject.searchFilterConfigs, function(filter) { return filter.config.layoutType === 'tabbed'; }, this); @@ -51,6 +53,47 @@ define([ this.searchFilterVms[componentName](this); }, + doQuery: function() { + const queryObj = JSON.parse(this.queryString()); + if (self.updateRequest) { self.updateRequest.abort(); } + self.updateRequest = $.ajax({ + type: "GET", + url: arches.urls.search_results, + data: queryObj, + context: this, + success: function(response) { + _.each(this.sharedStateObject.searchResults, function(value, key, results) { + if (key !== 'timestamp') { + delete this.sharedStateObject.searchResults[key]; + } + }, this); + _.each(response, function(value, key, response) { + if (key !== 'timestamp') { + this.sharedStateObject.searchResults[key] = value; + } + }, this); + this.sharedStateObject.searchResults.timestamp(response.timestamp); + this.searchQueryId(this.sharedStateObject.searchResults.searchqueryid); + this.sharedStateObject.userIsReviewer(response.reviewer); + this.sharedStateObject.userid(response.userid); + this.sharedStateObject.total(response.total_results); + this.sharedStateObject.hits(response.results.hits.hits.length); + this.sharedStateObject.alert(false); + }, + error: function(response, status, error) { + const alert = new AlertViewModel('ep-alert-red', arches.translations.requestFailed.title, response.responseJSON?.message); + if(self.updateRequest.statusText !== 'abort'){ + this.alert(alert); + } + this.sharedStateObject.loading(false); + }, + complete: function(request, status) { + self.updateRequest = undefined; + window.history.pushState({}, '', '?' + $.param(queryObj).split('+').join('%20')); + this.sharedStateObject.loading(false); + } + }); + }, }); return ko.components.register(componentName, { From 3d1349a8886bbbafe96ebfa319ae0f0c48a25dea Mon Sep 17 00:00:00 2001 From: Galen Date: Fri, 15 Nov 2024 12:59:18 -0800 Subject: [PATCH 08/14] hold - draft pit implementation in search, re #10502 --- .../search/components/standard_search_view.py | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/arches/app/search/components/standard_search_view.py b/arches/app/search/components/standard_search_view.py index 6af783b1af..59ac59f5fb 100644 --- a/arches/app/search/components/standard_search_view.py +++ b/arches/app/search/components/standard_search_view.py @@ -19,6 +19,7 @@ from django.utils.translation import gettext as _ from datetime import datetime import logging +import json details = { @@ -136,6 +137,86 @@ def append_dsl(self, search_query_object, **kwargs): if load_tiles: search_query_object["query"].include("tiles") + def set_search_pit(self, search_query_object, se, cache, **kwargs): + query_obj = kwargs.get("search_request_object", self.request.GET) + resourceids_only_query_hash_key = create_searchresults_cache_key( + self.request, query_obj, resourceids_only=True + ) + pit_response = se.es.open_point_in_time( + index=RESOURCES_INDEX, keep_alive="2m" # Adjust as needed + ) + pit_id = pit_response["pit_id"] + + # Perform the search + search_params = { + # Your search query parameters + } + + search_response = search_query_object["query"].search( + index=RESOURCES_INDEX, + body=search_params, + pit={"id": pit_id, "keep_alive": "2m"}, + size=1000, # Adjust as needed + ) + # TODO: how can I cache the search query itself? The QueryObject is really hard to serialize + # could just re-instantiate the filters from the search_layer to regenerate the QueryObject from scratch + + # Cache the pit_id and search parameters + cache.set( + resourceids_only_query_hash_key, + json.dumps({"pit_id": pit_id, "search_params": search_params}), + timeout=120, + ) + return resourceids_only_query_hash_key + + def execute_resourceids_only_query( + self, search_query_object, response_object, cache, **kwargs + ): + # cached_response_json = cache.get(cache_key) + query_obj = kwargs.get("search_request_object", self.request.GET) + resourceids_only_query_hash_key = create_searchresults_cache_key( + self.request, query_obj, resourceids_only=True + ) + # did we already cache result resourceids for this query under this query hash? + cached_result_resourceids = cache.get(resourceids_only_query_hash_key) + if ( + cached_result_resourceids + ): # we already did the work here; we'll return the hash key + return resourceids_only_query_hash_key + else: + print( + f"no cached resourceids for hashkey {resourceids_only_query_hash_key}" + ) + + if resourceinstanceid is None: + results = search_query_object["query"].search( + index=RESOURCES_INDEX, limit=10000, scroll="1m" + ) + scroll_id = results["_scroll_id"] + scroll_size = results["hits"]["total"]["value"] + total_results = results["hits"]["total"]["value"] + if query_obj.get("paging-filter", None) is None: + while scroll_size > 0: + page = search_query_object["query"].se.es.scroll( + scroll_id=scroll_id, scroll="3m" + ) + scroll_size = len(page["hits"]["hits"]) + results["hits"]["hits"] += page["hits"]["hits"] + else: + results = search_query_object["query"].search( + index=RESOURCES_INDEX, id=resourceinstanceid + ) + total_results = 1 + + if results is not None: + all_resourceids = [hit["_id"] for hit in results["hits"]["hits"]] + cache.set( + resourceids_only_query_hash_key, + json.dumps(all_resourceids), + settings.SEARCH_RESULTS_CACHE_TIMEOUT, + ) + return resourceids_only_query_hash_key + def execute_query(self, search_query_object, response_object, **kwargs): for_export = get_str_kwarg_as_bool("export", self.request.GET) pages = self.request.GET.get("pages", None) @@ -232,6 +313,53 @@ def handle_search_results_query( if returnDsl: return response_object, search_query_object + # at this point we want to FIRST do an unlimited query to get all resourceids + # of the results + # THEN SECOND we want to do a second query to get a rich set of results only for the page + unpaged_query = None + search_query_object["query"].include("tiles") + for_export = get_str_kwarg_as_bool("export", sorted_query_obj) + if not for_export: + resourceids_only_query_hash_key = self.execute_resourceids_only_query( + search_query_object, + response_object, + cache, + search_request_object=sorted_query_obj, + resourceinstanceid=resourceinstanceid, + ) + + # now I know the resourceids have been cached under the resourceids_only_query_hash_key + # I should set a start/end limit for the second query + paging_filter = search_filter_factory.get_filter("paging-filter") + if paging_filter: + paging_filter.append_dsl( + search_query_object, + permitted_nodegroups=permitted_nodegroups, + include_provisional=include_provisional, + load_tiles=load_tiles, + for_export=for_export, + querystring=sorted_query_obj.get("paging-filter", "{}"), + search_request_object=sorted_query_obj, + ) + + search_query_object["query"].include("graph_id") + # if geom_only or for_export or map_manager or load_tiles: + search_query_object["query"].include("geometries") + search_query_object["query"].include("points") + # if not geom_only: + for prop in essential_result_properties: + search_query_object["query"].include(prop) + # if load_tiles: + # search_query_object["query"].include("tiles") + search_query_object["query"].include("resourceinstanceid") + + self.execute_paged_query( + search_query_object, + response_object, + search_request_object=sorted_query_obj, + resourceinstanceid=resourceinstanceid, + ) + for filter_type, querystring in list(sorted_query_obj.items()): search_filter = search_filter_factory.get_filter(filter_type) if search_filter: From 675fb32b2fb879ad701c3fe59f4f8c147e21c271 Mon Sep 17 00:00:00 2001 From: Galen Date: Sun, 17 Nov 2024 17:40:47 -0800 Subject: [PATCH 09/14] first iteration using geotilegrid in search_layer, re #10502 --- arches/app/views/search_layer.py | 380 ++++++++++++++++++------------- 1 file changed, 225 insertions(+), 155 deletions(-) diff --git a/arches/app/views/search_layer.py b/arches/app/views/search_layer.py index 4c5e04d4ac..2db577a43e 100644 --- a/arches/app/views/search_layer.py +++ b/arches/app/views/search_layer.py @@ -1,178 +1,248 @@ +import math from django.views import View -# from django.http import JsonResponse -import json from django.core.cache import caches from arches.app.models.system_settings import settings from django.utils.translation import gettext as _ -# from arches.app.search.search_engine_factory import SearchEngineFactory -from django.db import connection +from arches.app.search.search_engine_factory import SearchEngineFactory +from arches.app.search.elasticsearch_dsl_builder import ( + Query, + Bool, + GeoShape, + Nested, + GeoTileGridAgg, + NestedAgg, + Aggregation, +) + +# from django.db import connection from django.http import Http404, HttpResponse +from arches.app.utils.betterJSONSerializer import JSONDeserializer +from pprint import pprint + +# from django.contrib.gis.geos import Polygon +from datetime import datetime, timedelta +from time import time +import mercantile +import mapbox_vector_tile + +ZOOM_THRESHOLD = 14 +EXTENT = 4096 class SearchLayer(View): def get(self, request, zoom, x, y): - # se = SearchEngineFactory().create() + start = time() + print(f"ZOOM: {zoom}") searchid = request.GET.get("searchid", None) if not searchid: + print("NO SEARCHID FOUND ON REQUEST") raise Http404(_("Missing 'searchid' query parameter.")) + EARTHCIRCUM = 40075016.6856 PIXELSPERTILE = 256 cache = caches["default"] - resource_ids = cache.get(searchid) - if resource_ids: - resource_ids = json.loads(resource_ids) - else: + pit_id = cache.get(searchid + "_pit") + query_dsl = cache.get(searchid + "_dsl") + # pprint(query_dsl) + # {"pit_id": pit_id, "dsl": query.dsl} + if pit_id is None or query_dsl is None: print(f"no resourceids found in cache for searchid: {searchid}") raise Http404(_("Missing resourceids from search cache.")) - search_geom_count = 0 - cache_key = create_searchlayer_mvt_cache_key(searchid, zoom, x, y, request.user) - tile = cache.get(cache_key) - if tile is None: - with connection.cursor() as cursor: - if len(resource_ids) == 0: - resource_ids.append( - "10000000-0000-0000-0000-000000000001" - ) # This must have a uuid that will never be a resource id. - resource_ids = tuple(resource_ids) - - if int(zoom) < 14: - arc = EARTHCIRCUM / ((1 << int(zoom)) * PIXELSPERTILE) - distance = arc * float(1000) - min_points = 3 - distance = ( - settings.CLUSTER_DISTANCE_MAX - if distance > settings.CLUSTER_DISTANCE_MAX - else distance - ) - - count_query = """ - SELECT count(*) FROM geojson_geometries - WHERE - ST_Intersects(geom, TileBBox(%s, %s, %s, 3857)) - AND - resourceinstanceid in %s - """ - - # get the count of matching geometries - cursor.execute( - count_query, - [ - zoom, - x, - y, - resource_ids, - ], - ) - search_geom_count = cursor.fetchone()[0] - - if search_geom_count >= min_points: - cursor.execute( - """WITH clusters(tileid, resourceinstanceid, nodeid, geom, cid) - AS ( - SELECT m.*, - ST_ClusterDBSCAN(geom, eps := %s, minpoints := %s) over () AS cid - FROM ( - SELECT tileid, - resourceinstanceid, - nodeid, - geom - FROM geojson_geometries - WHERE - ST_Intersects(geom, TileBBox(%s, %s, %s, 3857)) - AND - resourceinstanceid in %s - ) m - ) - SELECT ST_AsMVT( - tile, - 'search_layer', - 4096, - 'geom', - 'id' - ) FROM ( - SELECT resourceinstanceid::text, - row_number() over () as id, - 1 as point_count, - ST_AsMVTGeom( - geom, - TileBBox(%s, %s, %s, 3857) - ) AS geom, - '' AS extent - FROM clusters - WHERE cid is NULL - UNION - SELECT NULL as resourceinstanceid, - row_number() over () as id, - count(*) as point_count, - ST_AsMVTGeom( - ST_Centroid( - ST_Collect(geom) - ), - TileBBox(%s, %s, %s, 3857) - ) AS geom, - ST_AsGeoJSON( - ST_Extent(geom) - ) AS extent - FROM clusters - WHERE cid IS NOT NULL - GROUP BY cid - ) as tile;""", - [ - distance, - min_points, - zoom, - x, - y, - resource_ids, - zoom, - x, - y, - zoom, - x, - y, - ], - ) - elif search_geom_count: - cursor.execute( - """SELECT ST_AsMVT(tile, 'search_layer', 4096, 'geom', 'id') FROM (SELECT tileid, - id, - resourceinstanceid, - nodeid, - featureid::text AS featureid, - ST_AsMVTGeom( - geom, - TileBBox(%s, %s, %s, 3857) - ) AS geom, - 1 AS point_count - FROM geojson_geometries - WHERE resourceinstanceid in %s and (geom && ST_TileEnvelope(%s, %s, %s))) AS tile;""", - [zoom, x, y, resource_ids, zoom, x, y], - ) - else: - tile = "" - - cursor.execute( - """SELECT ST_AsMVT(tile, 'search_layer', 4096, 'geom', 'id') FROM (SELECT tileid, - id, - resourceinstanceid, - nodeid, - featureid::text AS featureid, - ST_AsMVTGeom( - geom, - TileBBox(%s, %s, %s, 3857) - ) AS geom, - 1 AS point_count - FROM geojson_geometries - WHERE resourceinstanceid in %s and (geom && ST_TileEnvelope(%s, %s, %s))) AS tile;""", - [zoom, x, y, resource_ids, zoom, x, y], - ) - tile = bytes(cursor.fetchone()[0]) if tile is None else tile - cache.set(cache_key, tile, settings.TILE_CACHE_TIMEOUT) - - return HttpResponse(tile, content_type="application/x-protobuf") + se = SearchEngineFactory().create() + query_dsl = JSONDeserializer().deserialize(query_dsl, indent=4) + new_query = Query(se, limit=0) + new_query.prepare() + new_query.dsl = query_dsl + # spatial_query = Bool() + # if int(y) == 203: + # print("\n\n\nwhats my new query\n\n\n") + # pprint(new_query.__str__()) + tile_x = int(x) + tile_y = int(y) + tile_z = int(zoom) + tile_bounds = mercantile.bounds(tile_x, tile_y, tile_z) + bbox = ( + tile_bounds.west, + tile_bounds.south, + tile_bounds.east, + tile_bounds.north, + ) + geo_bbox_query = { + "geo_bounding_box": { + "points.point": { + "top_left": {"lat": tile_bounds.north, "lon": tile_bounds.west}, + "bottom_right": {"lat": tile_bounds.south, "lon": tile_bounds.east}, + } + } + } + + if int(zoom) < ZOOM_THRESHOLD: + + geotile_agg = GeoTileGridAgg( + precision=int(zoom), field="points.point", size=10000 + ) + centroid_agg = Aggregation( + type="geo_centroid", name="centroid", field="points.point" + ) + geotile_agg.add_aggregation(centroid_agg) + nested_agg = NestedAgg(path="points", name="geo_aggs") + nested_agg.add_aggregation(geotile_agg) + + # Build the filter aggregation + geo_filter_agg = Aggregation( + type="filter", + name="geo_filter", + filter=Nested(path="points", query=geo_bbox_query).dsl, + ) + + # Add the geotile_grid aggregation under the filter aggregation + geo_filter_agg.add_aggregation(geotile_agg) + + # Update the nested aggregation + nested_agg = NestedAgg(path="points", name="geo_aggs") + nested_agg.add_aggregation(geo_filter_agg) + new_query.add_aggregation(nested_agg) + + # pit doesn't allow scroll context or index + new_query.dsl["source_includes"] = [] + new_query.dsl["size"] = 0 + # if int(y) == 203: + # pprint(new_query.dsl) + results = se.es.search( + pit={"id": pit_id, "keep_alive": "2m"}, _source=False, **new_query.dsl + ) + elapsed = time() - start + # print( + # "_______Time to finish search_layer search 1 (total: {0}) = {1}".format(results["hits"]["total"]["value"], timedelta(seconds=elapsed)) + # ) + # print("search done") + # print(results["hits"]["total"]) + # pprint(results) + features = [] + buckets = results["aggregations"]["geo_aggs"]["geo_filter"]["zoomed_grid"][ + "buckets" + ] + # print(f"Number of buckets: {len(buckets)}") + + for bucket in buckets: + centroid = bucket["centroid"]["location"] + lon = centroid["lon"] + lat = centroid["lat"] + doc_count = bucket["doc_count"] + # px, py = lnglat_to_tile_px(lon, lat, tile_x, tile_y, tile_z, EXTENT) + + feature = { + "geometry": {"type": "Point", "coordinates": [lon, lat]}, + "properties": {"count": doc_count}, + } + + features.append(feature) + + layers = [ + { + "name": "clusters", # Layer name + "features": features, + "version": 2, + "extent": EXTENT, + } + ] + else: + # Fetch individual features + # Add the spatial filter to the query + points_spatial_query = Nested(path="points", query=geo_bbox_query) + # new_query.add_query(spatial_query) + + geometries_spatial_query = Nested(path="geometries", query=geo_bbox_query) + spatial_bool_query = Bool() + spatial_bool_query.should(points_spatial_query) + spatial_bool_query.should(geometries_spatial_query) + new_query.add_query(spatial_bool_query) + + new_query.dsl["size"] = 10000 + + new_query.include("points.point") + new_query.include("geometries.geom") + # new_query.include("resourceinstanceid") + # Add other fields if needed + + # Execute the search + results = se.es.search( + pit={"id": pit_id, "keep_alive": "2m"}, **new_query.dsl + ) + + # Process the hits to generate features + features = [] + point_features = [] + geometry_features = [] + + for hit in results["hits"]["hits"]: + source = hit["_source"] + resource_id = hit.get("_id") + + # Handle points + points = source.get("points", []) + for point in points: + point_geom = point.get("point") + if point_geom: + lon = point_geom.get("lon") + lat = point_geom.get("lat") + if lon and lat: + feature = { + "geometry": { + "type": "Point", + "coordinates": [lon, lat], + }, + "properties": { + "resourceinstanceid": resource_id, + "count": 1, + }, + } + point_features.append(feature) + geometries = source.get("geometries", []) + for geometry in geometries: + geom = geometry.get("geom") + if geom: + geom_type = geom.get("type") + coordinates = geom.get("coordinates") + if coordinates: + feature = { + "geometry": { + "type": geom_type, + "coordinates": coordinates, + }, + "properties": {"resourceinstanceid": resource_id}, + } + pprint(feature) + geometry_features.append(feature) + + # Build layers + layers = [] + + if point_features: + point_layer = { + "name": "points", + "features": point_features, + "version": 2, + "extent": EXTENT, + } + layers.append(point_layer) + + if geometry_features: + geometry_layer = { + "name": "geometries", + "features": geometry_features, + "version": 2, + "extent": EXTENT, + } + layers.append(geometry_layer) + + tile = mapbox_vector_tile.encode( + layers, quantize_bounds=bbox, y_coord_down=True, extents=EXTENT + ) + return HttpResponse(tile, content_type="application/vnd.mapbox-vector-tile") def create_searchlayer_mvt_cache_key(searchid_hash, zoom, x, y, user): From 20d2c488f4f28aca91108f2d41f7b5252ba288fd Mon Sep 17 00:00:00 2001 From: Galen Date: Sun, 17 Nov 2024 20:28:25 -0800 Subject: [PATCH 10/14] commit es dsl upgrades for compat --- .../app/search/elasticsearch_dsl_builder.py | 50 +++++++++++++------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/arches/app/search/elasticsearch_dsl_builder.py b/arches/app/search/elasticsearch_dsl_builder.py index 9338e6a4a5..156a49ebe7 100644 --- a/arches/app/search/elasticsearch_dsl_builder.py +++ b/arches/app/search/elasticsearch_dsl_builder.py @@ -505,6 +505,7 @@ def __init__(self, **kwargs): self.script = kwargs.pop("script", None) self.type = kwargs.pop("type", None) self.size = kwargs.pop("size", None) + self.filter = kwargs.pop("filter", None) # Extract 'filter' from kwargs if self.field is not None and self.script is not None: raise AggregationDSLException( @@ -517,23 +518,34 @@ def __init__(self, **kwargs): if self.type is None: raise AggregationDSLException(_("You need to specify an aggregation type")) - self.agg = {self.name: {self.type: {}}} + # Initialize the aggregation dictionary + self.agg = {self.name: {}} - if self.field is not None: - self.agg[self.name][self.type]["field"] = self.field - elif self.script is not None: - self.agg[self.name][self.type]["script"] = self.script + if self.type == "filter": + if self.filter is None: + raise AggregationDSLException( + _("You need to specify 'filter' for a filter aggregation") + ) + # For filter aggregation, place the filter content directly + self.agg[self.name][self.type] = self.filter + else: + self.agg[self.name][self.type] = {} - self.set_size(self.size) + if self.field is not None: + self.agg[self.name][self.type]["field"] = self.field + elif self.script is not None: + self.agg[self.name][self.type]["script"] = self.script - for key in kwargs: - self.agg[self.name][self.type][key] = kwargs.get(key, None) + self.set_size(self.size) + + # Set other keyword arguments + for key in kwargs: + self.agg[self.name][self.type][key] = kwargs.get(key, None) def add_aggregation(self, agg=None): if agg is not None: if "aggs" not in self.agg[self.name]: self.agg[self.name]["aggs"] = {} - self.agg[self.name]["aggs"][agg.name] = agg.agg[agg.name] def set_size(self, size): @@ -559,6 +571,18 @@ def __init__(self, **kwargs): self.agg[self.name][self.type]["precision"] = self.precision +class GeoTileGridAgg(Aggregation): + """ + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geotilegrid-aggregation.html + + """ + + def __init__(self, **kwargs): + self.precision = kwargs.get("precision", 5) + super(GeoTileGridAgg, self).__init__(type="geotile_grid", **kwargs) + self.agg[self.name][self.type]["precision"] = self.precision + + class GeoBoundsAgg(Aggregation): """ https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-geohashgrid-aggregation.html @@ -701,16 +725,14 @@ class NestedAgg(Aggregation): """ def __init__(self, **kwargs): - self.aggregation = kwargs.pop("agg", {}) + # self.aggregation = kwargs.pop("agg", {}) self.path = kwargs.pop("path", None) if self.path is None: raise NestedAggDSLException( _("You need to specify a path for your nested aggregation") ) - super(NestedAgg, self).__init__(type="nested", path=self.path, **kwargs) - - if self.name: - self.agg[self.name]["aggs"] = self.aggregation + super(NestedAgg, self).__init__(type="nested", **kwargs) + self.agg[self.name][self.type]["path"] = self.path class NestedAggDSLException(Exception): From 04e5fd6bdf3a3de93344951d5aae2bd3797ac44e Mon Sep 17 00:00:00 2001 From: Galen Date: Sun, 17 Nov 2024 20:32:26 -0800 Subject: [PATCH 11/14] commit map.js viewmodel changes --- arches/app/media/js/viewmodels/map.js | 319 ++++++++++---------------- 1 file changed, 124 insertions(+), 195 deletions(-) diff --git a/arches/app/media/js/viewmodels/map.js b/arches/app/media/js/viewmodels/map.js index d3f6dbf953..917f3752d8 100644 --- a/arches/app/media/js/viewmodels/map.js +++ b/arches/app/media/js/viewmodels/map.js @@ -10,235 +10,164 @@ define([ 'templates/views/components/map-popup.htm' ], function($, _, arches, ko, koMapping, mapPopupProvider, mapConfigurator, ariaUtils) { const viewModel = function(params) { - var self = this; - - const searchLayerIds = [ - 'searchtiles-unclustered-polygon-fill', - 'searchtiles-unclustered-point', - 'searchtiles-clusters', - 'searchtiles-clusters-halo', - 'searchtiles-cluster-count', - 'searchtiles-unclustered-polypoint' - ]; + const ZOOM_THRESHOLD = 14; + const self = this; + const searchLayerIds = ['clusters', 'cluster-count', 'unclustered-point', 'individual-points', 'individual-geometries']; + const searchLayerDefinitions = [ { - "id": "searchtiles-unclustered-polygon-fill", - "type": "fill", - "paint": { - "fill-color": "#fa6003", - "fill-opacity": 0.3, - "fill-outline-color": "#fa6003" - }, - "filter": [ - "==", - "$type", - "Polygon" - ], - "source": "search-layer-source", - "source-layer": "search_layer", - "minzoom": 10, - "tolerance": 0.75 + id: 'individual-points', + type: 'circle', + source: 'search-layer-source', + 'source-layer': 'points', + minzoom: ZOOM_THRESHOLD, + paint: { + 'circle-color': '#fa6003', + 'circle-radius': 5, + 'circle-opacity': 1 + } }, { - "id": "searchtiles-unclustered-point", - "type": "circle", - "paint": { - "circle-color": "#fa6003", - "circle-radius": 6, - "circle-opacity": 1 - }, - "filter": [ - "!", - [ - "has", - "point_count" - ] - ], - "source": "search-layer-source", - "source-layer": "search_layer" + id: 'individual-geometries', + type: 'fill', + source: 'search-layer-source', + 'source-layer': 'geometries', + minzoom: ZOOM_THRESHOLD-1, + paint: { + 'fill-color': '#fa6003', + 'fill-opacity': 0.3, + 'fill-outline-color': '#fa6003' + } }, { - "id": "searchtiles-clusters", - "type": "circle", - "paint": { - "circle-color": "#fa6003", - "circle-radius": [ - "step", - [ - "get", - "point_count" - ], - 10, - 100, - 20, - 750, - 30, - 1500, - 40, - 2500, - 50, - 5000, - 65 + "id": "clusters", + "type": "circle", + "source": "search-layer-source", + "source-layer": "clusters", + "filter": [ + "all", + ["has", "count"], + [">", ["get", "count"], 1] ], - "circle-opacity": [ - "case", - [ - "boolean", - [ - "has", - "point_count" + "paint": { + "circle-color": "#fa6003", + "circle-radius": [ + "step", + ["get", "count"], + 15, + 10, 20, + 50, 25, + 100, 30, + 500, 35, + 1000, 40 ], - true - ], - 1, - 0 - ] - }, - "filter": [ - "all", - [ - "==", - "$type", - "Point" - ], - [ - "!=", - "highlight", - true - ] - ], - "source": "search-layer-source", - "source-layer": "search_layer" + "circle-opacity": 0.8 + }, + "maxzoom": ZOOM_THRESHOLD, + "minzoom": 1 }, { - "id": "searchtiles-clusters-halo", - "type": "circle", - "paint": { - "circle-color": "#fa6003", - "circle-radius": [ - "step", - [ - "get", - "point_count" - ], - 20, - 100, - 30, - 750, - 40, - 1500, - 50, - 2500, - 60, - 5000, - 75 - ], - "circle-opacity": [ - "case", - [ - "boolean", - [ - "has", - "point_count" - ], - true - ], - 0.5, - 0 - ] - }, - "filter": [ - "all", - [ - "==", - "$type", - "Point" + "id": "cluster-count", + "type": "symbol", + "source": "search-layer-source", + "source-layer": "clusters", + "filter": [ + "all", + ["has", "count"], + [">", ["get", "count"], 0] ], - [ - "!=", - "highlight", - true - ] - ], - "maxzoom": 14, - "source": "search-layer-source", - "source-layer": "search_layer" + "layout": { + "text-field": "{count}", + "text-font": ["DIN Offc Pro Medium", "Arial Unicode MS Bold"], + "text-size": 12 + }, + "paint": { + "text-color": "#ffffff" + }, + "maxzoom": ZOOM_THRESHOLD, + "minzoom": 1 }, { - "id": "searchtiles-cluster-count", - "type": "symbol", - "paint": { - "text-color": "#fff" - }, - "filter": [ - "has", - "point_count" - ], - "layout": { - "text-font": [ - "DIN Offc Pro Medium", - "Arial Unicode MS Bold" + "id": "unclustered-point", + "type": "circle", + "source": "search-layer-source", + "source-layer": "clusters", + "filter": [ + "all", + ["has", "count"], + ["==", ["get", "count"], 1] ], - "text-size": 14, - "text-field": "{point_count}" - }, - "maxzoom": 14, - "source": "search-layer-source", - "source-layer": "search_layer" - }, - { - "id": "searchtiles-unclustered-polypoint", - "type": "circle", - "paint": { - "circle-color": "#fa6003", - "circle-radius": 0, - "circle-opacity": 0, - "circle-stroke-color": "#fff", - "circle-stroke-width": 0 - }, - "filter": [ - "!", - [ - "has", - "point_count" - ] - ], - "layout": { - "visibility": "none" - }, - "source": "search-layer-source", - "source-layer": "search_layer" + "paint": { + "circle-color": "#fa6003", + "circle-radius": 5, + "circle-opacity": 1 + }, + minzoom: ZOOM_THRESHOLD } ]; - this.searchQueryId = params.searchQueryId; + this.searchQueryId = params.searchQueryId || ko.observable(); this.searchQueryId.subscribe(function (searchId) { if (searchId) { self.addSearchLayer(searchId); - } else { - // optionally, remove the search layer if searchId becomes undefined + } else if (ko.unwrap(self.map)) { self.removeSearchLayer(); } }); + this.addClusterClickHandlers = function() { + const map = self.map(); + + // Handle clicks on clusters + map.on('click', 'clusters', function(e) { + var features = map.queryRenderedFeatures(e.point, { layers: ['clusters'] }); + var feature = features[0]; + var count = feature.properties.count; + + if (count > 1) { + // Zoom in on the cluster + var coordinates = feature.geometry.coordinates.slice(); + map.easeTo({ + center: coordinates, + zoom: map.getZoom() + 2 + }); + } else { + // For count == 1, show a popup + self.onFeatureClick(features, e.lngLat, mapboxgl); + } + }); + + // Change the cursor to a pointer when over clusters and unclustered points + map.on('mouseenter', 'clusters', function() { + map.getCanvas().style.cursor = 'pointer'; + }); + map.on('mouseleave', 'clusters', function() { + map.getCanvas().style.cursor = ''; + }); + map.on('mouseenter', 'unclustered-point', function() { + map.getCanvas().style.cursor = 'pointer'; + }); + map.on('mouseleave', 'unclustered-point', function() { + map.getCanvas().style.cursor = ''; + }); + }; + + this.addSearchLayer = function (searchId) { console.log(searchId); if (!self.map()) return; - const tileUrlTemplate = `http://localhost:8000/search-layer/{z}/{x}/{y}.pbf?searchid=${encodeURIComponent(searchId)}`; - + const tileUrlTemplate = `${window.location.origin}/search-layer/{z}/{x}/{y}.pbf?searchid=${encodeURIComponent(searchId)}`; + // Remove existing source and layer if they exist searchLayerIds.forEach(layerId => { if (self.map().getLayer(layerId)) { self.map().removeLayer(layerId); } - if (self.map().getSource(layerId)) { - self.map().removeSource(layerId); - } }); if (self.map().getSource('search-layer-source')) { self.map().removeSource('search-layer-source'); } - + // Add the vector tile source self.map().addSource('search-layer-source', { type: 'vector', @@ -246,12 +175,13 @@ define([ minzoom: 0, maxzoom: 22, }); - + // Add the layer to display the data searchLayerDefinitions.forEach(mapLayer => { self.map().addLayer(mapLayer); }); + self.addClusterClickHandlers(); // Optionally, fit the map to the data bounds // self.fitMapToDataBounds(searchId); }; @@ -316,7 +246,6 @@ define([ if (ko.unwrap(params.bounds)) { map.fitBounds(ko.unwrap(params.bounds), boundingOptions); } - // If searchQueryId is already available, add the search layer if (self.searchQueryId()) { self.addSearchLayer(self.searchQueryId()); From 126a12d89516361367ffd1a5bc8490f2d476a87e Mon Sep 17 00:00:00 2001 From: Galen Date: Sun, 17 Nov 2024 20:40:10 -0800 Subject: [PATCH 12/14] commit latest changes to standard_search --- .../search/components/standard_search_view.py | 76 ++++++++----------- 1 file changed, 30 insertions(+), 46 deletions(-) diff --git a/arches/app/search/components/standard_search_view.py b/arches/app/search/components/standard_search_view.py index 59ac59f5fb..f3a7fc07c8 100644 --- a/arches/app/search/components/standard_search_view.py +++ b/arches/app/search/components/standard_search_view.py @@ -16,6 +16,7 @@ user_is_resource_exporter, ) from arches.app.utils.string_utils import get_str_kwarg_as_bool +from django.core.cache import cache from django.utils.translation import gettext as _ from datetime import datetime import logging @@ -169,53 +170,37 @@ def set_search_pit(self, search_query_object, se, cache, **kwargs): ) return resourceids_only_query_hash_key - def execute_resourceids_only_query( - self, search_query_object, response_object, cache, **kwargs - ): - # cached_response_json = cache.get(cache_key) - query_obj = kwargs.get("search_request_object", self.request.GET) + def execute_resourceids_only_query(self, search_query_object, cache, se, **kwargs): + search_request_object = kwargs.get("search_request_object", self.request.GET) resourceids_only_query_hash_key = create_searchresults_cache_key( - self.request, query_obj, resourceids_only=True + self.request, search_request_object, resourceids_only=True ) - # did we already cache result resourceids for this query under this query hash? - cached_result_resourceids = cache.get(resourceids_only_query_hash_key) - if ( - cached_result_resourceids - ): # we already did the work here; we'll return the hash key - return resourceids_only_query_hash_key - else: - print( - f"no cached resourceids for hashkey {resourceids_only_query_hash_key}" - ) - if resourceinstanceid is None: - results = search_query_object["query"].search( - index=RESOURCES_INDEX, limit=10000, scroll="1m" - ) - scroll_id = results["_scroll_id"] - scroll_size = results["hits"]["total"]["value"] - total_results = results["hits"]["total"]["value"] - if query_obj.get("paging-filter", None) is None: - while scroll_size > 0: - page = search_query_object["query"].se.es.scroll( - scroll_id=scroll_id, scroll="3m" - ) - scroll_size = len(page["hits"]["hits"]) - results["hits"]["hits"] += page["hits"]["hits"] - else: - results = search_query_object["query"].search( - index=RESOURCES_INDEX, id=resourceinstanceid - ) - total_results = 1 + hpla_idx = f"{settings.ELASTICSEARCH_PREFIX}_{RESOURCES_INDEX}" + pit_response = se.es.open_point_in_time( + index=hpla_idx, keep_alive="2m" # Adjust as needed + ) + pit_id = pit_response.get("id") + # Perform the search + search_query_object["query"].prepare() + query_dsl = search_query_object["query"].dsl + search_response = se.es.search( + pit={"id": pit_id, "keep_alive": "5m"}, _source=False, **query_dsl + ) - if results is not None: - all_resourceids = [hit["_id"] for hit in results["hits"]["hits"]] - cache.set( - resourceids_only_query_hash_key, - json.dumps(all_resourceids), - settings.SEARCH_RESULTS_CACHE_TIMEOUT, - ) - return resourceids_only_query_hash_key + # Cache the pit_id and search parameters + cache.set( + resourceids_only_query_hash_key + "_pit", + pit_id, + timeout=120, + ) + cache.set( + resourceids_only_query_hash_key + "_dsl", + search_query_object["query"].__str__(), + timeout=120, + ) + + return resourceids_only_query_hash_key def execute_query(self, search_query_object, response_object, **kwargs): for_export = get_str_kwarg_as_bool("export", self.request.GET) @@ -322,10 +307,9 @@ def handle_search_results_query( if not for_export: resourceids_only_query_hash_key = self.execute_resourceids_only_query( search_query_object, - response_object, cache, + se, search_request_object=sorted_query_obj, - resourceinstanceid=resourceinstanceid, ) # now I know the resourceids have been cached under the resourceids_only_query_hash_key @@ -357,7 +341,6 @@ def handle_search_results_query( search_query_object, response_object, search_request_object=sorted_query_obj, - resourceinstanceid=resourceinstanceid, ) for filter_type, querystring in list(sorted_query_obj.items()): @@ -366,6 +349,7 @@ def handle_search_results_query( search_filter.execute_query(search_query_object, response_object) if response_object["results"] is not None: + response_object["searchqueryid"] = resourceids_only_query_hash_key # allow filters to modify the results for filter_type, querystring in list(sorted_query_obj.items()): search_filter = search_filter_factory.get_filter(filter_type) From e0686282d4e537ebd4e77f1107abfd3d87cb4aa4 Mon Sep 17 00:00:00 2001 From: Galen Date: Sun, 17 Nov 2024 20:49:17 -0800 Subject: [PATCH 13/14] cleanup stndrd search changes --- .../search/components/standard_search_view.py | 102 ++++++------------ 1 file changed, 31 insertions(+), 71 deletions(-) diff --git a/arches/app/search/components/standard_search_view.py b/arches/app/search/components/standard_search_view.py index f3a7fc07c8..6efa8e5a30 100644 --- a/arches/app/search/components/standard_search_view.py +++ b/arches/app/search/components/standard_search_view.py @@ -1,5 +1,5 @@ from typing import Dict, Tuple - +import hashlib from arches.app.models.system_settings import settings from arches.app.search.components.base_search_view import BaseSearchView from arches.app.search.components.base import SearchFilterFactory @@ -138,38 +138,6 @@ def append_dsl(self, search_query_object, **kwargs): if load_tiles: search_query_object["query"].include("tiles") - def set_search_pit(self, search_query_object, se, cache, **kwargs): - query_obj = kwargs.get("search_request_object", self.request.GET) - resourceids_only_query_hash_key = create_searchresults_cache_key( - self.request, query_obj, resourceids_only=True - ) - pit_response = se.es.open_point_in_time( - index=RESOURCES_INDEX, keep_alive="2m" # Adjust as needed - ) - pit_id = pit_response["pit_id"] - - # Perform the search - search_params = { - # Your search query parameters - } - - search_response = search_query_object["query"].search( - index=RESOURCES_INDEX, - body=search_params, - pit={"id": pit_id, "keep_alive": "2m"}, - size=1000, # Adjust as needed - ) - # TODO: how can I cache the search query itself? The QueryObject is really hard to serialize - # could just re-instantiate the filters from the search_layer to regenerate the QueryObject from scratch - - # Cache the pit_id and search parameters - cache.set( - resourceids_only_query_hash_key, - json.dumps({"pit_id": pit_id, "search_params": search_params}), - timeout=120, - ) - return resourceids_only_query_hash_key - def execute_resourceids_only_query(self, search_query_object, cache, se, **kwargs): search_request_object = kwargs.get("search_request_object", self.request.GET) resourceids_only_query_hash_key = create_searchresults_cache_key( @@ -192,12 +160,12 @@ def execute_resourceids_only_query(self, search_query_object, cache, se, **kwarg cache.set( resourceids_only_query_hash_key + "_pit", pit_id, - timeout=120, + timeout=300, ) cache.set( resourceids_only_query_hash_key + "_dsl", search_query_object["query"].__str__(), - timeout=120, + timeout=300, ) return resourceids_only_query_hash_key @@ -298,11 +266,6 @@ def handle_search_results_query( if returnDsl: return response_object, search_query_object - # at this point we want to FIRST do an unlimited query to get all resourceids - # of the results - # THEN SECOND we want to do a second query to get a rich set of results only for the page - unpaged_query = None - search_query_object["query"].include("tiles") for_export = get_str_kwarg_as_bool("export", sorted_query_obj) if not for_export: resourceids_only_query_hash_key = self.execute_resourceids_only_query( @@ -312,37 +275,6 @@ def handle_search_results_query( search_request_object=sorted_query_obj, ) - # now I know the resourceids have been cached under the resourceids_only_query_hash_key - # I should set a start/end limit for the second query - paging_filter = search_filter_factory.get_filter("paging-filter") - if paging_filter: - paging_filter.append_dsl( - search_query_object, - permitted_nodegroups=permitted_nodegroups, - include_provisional=include_provisional, - load_tiles=load_tiles, - for_export=for_export, - querystring=sorted_query_obj.get("paging-filter", "{}"), - search_request_object=sorted_query_obj, - ) - - search_query_object["query"].include("graph_id") - # if geom_only or for_export or map_manager or load_tiles: - search_query_object["query"].include("geometries") - search_query_object["query"].include("points") - # if not geom_only: - for prop in essential_result_properties: - search_query_object["query"].include(prop) - # if load_tiles: - # search_query_object["query"].include("tiles") - search_query_object["query"].include("resourceinstanceid") - - self.execute_paged_query( - search_query_object, - response_object, - search_request_object=sorted_query_obj, - ) - for filter_type, querystring in list(sorted_query_obj.items()): search_filter = search_filter_factory.get_filter(filter_type) if search_filter: @@ -368,3 +300,31 @@ def handle_search_results_query( response_object[key] = value return response_object, search_query_object + + +def create_searchresults_cache_key(request, search_query, **kwargs): + """ + method to create a hash cache key + blends a snapshot of the current database with whatever the searchquery was + also cleans/sorts the searchquery before converting to string in order to normalize + kwargs: + - dict: search_query - contains search query filters/parameters + """ + resourceids_only = kwargs.get("resourceids_only", False) + user_proxy = ( + request.user.username + if request.user.username == "anonymous" + else str(request.user.id) + ) + search_query_string = "".join([k + str(v) for k, v in sorted(search_query.items())]) + + search_query_string = search_query_string.strip() + + hashable_string = search_query_string + user_proxy + hashable_string += "rids" if resourceids_only else "" + b = bytearray() + b.extend((search_query_string + user_proxy).encode()) + search_query_cache_key_hash = hashlib.sha1(b) + search_query_cache_key_hash = search_query_cache_key_hash.hexdigest() + + return search_query_cache_key_hash From 6b6813de7d75d0ab3be4cb0b6033a0a4d8cdcaaf Mon Sep 17 00:00:00 2001 From: Galen Date: Sun, 17 Nov 2024 20:51:50 -0800 Subject: [PATCH 14/14] increase pit keepalive to 5m --- arches/app/search/components/standard_search_view.py | 2 +- arches/app/views/search_layer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arches/app/search/components/standard_search_view.py b/arches/app/search/components/standard_search_view.py index 6efa8e5a30..bde6e39e45 100644 --- a/arches/app/search/components/standard_search_view.py +++ b/arches/app/search/components/standard_search_view.py @@ -146,7 +146,7 @@ def execute_resourceids_only_query(self, search_query_object, cache, se, **kwarg hpla_idx = f"{settings.ELASTICSEARCH_PREFIX}_{RESOURCES_INDEX}" pit_response = se.es.open_point_in_time( - index=hpla_idx, keep_alive="2m" # Adjust as needed + index=hpla_idx, keep_alive="5m" # Adjust as needed ) pit_id = pit_response.get("id") # Perform the search diff --git a/arches/app/views/search_layer.py b/arches/app/views/search_layer.py index 2db577a43e..1687e5eb6c 100644 --- a/arches/app/views/search_layer.py +++ b/arches/app/views/search_layer.py @@ -112,7 +112,7 @@ def get(self, request, zoom, x, y): # if int(y) == 203: # pprint(new_query.dsl) results = se.es.search( - pit={"id": pit_id, "keep_alive": "2m"}, _source=False, **new_query.dsl + pit={"id": pit_id, "keep_alive": "5m"}, _source=False, **new_query.dsl ) elapsed = time() - start # print( @@ -170,7 +170,7 @@ def get(self, request, zoom, x, y): # Execute the search results = se.es.search( - pit={"id": pit_id, "keep_alive": "2m"}, **new_query.dsl + pit={"id": pit_id, "keep_alive": "5m"}, **new_query.dsl ) # Process the hits to generate features