-
Notifications
You must be signed in to change notification settings - Fork 25.9k
Add retriever to the query rewrite phase #110482
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7aa49d1
83a0fe4
ae28c7b
aa09fb2
f127e7b
165604d
c4d6750
3762bdd
e8f00b7
a370cac
29266a7
a75e885
cc6204a
c6682ec
effb879
1700df5
a52e530
46689a3
910ce92
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,264 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
| * in compliance with, at your election, the Elastic License 2.0 or the Server | ||
| * Side Public License, v 1. | ||
| */ | ||
|
|
||
| package org.elasticsearch.search.retriever; | ||
|
|
||
| import org.apache.lucene.search.TotalHits; | ||
| import org.apache.lucene.util.SetOnce; | ||
| import org.elasticsearch.action.ActionListener; | ||
| import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; | ||
| import org.elasticsearch.action.search.SearchPhaseExecutionException; | ||
| import org.elasticsearch.action.search.SearchRequest; | ||
| import org.elasticsearch.action.search.SearchRequestBuilder; | ||
| import org.elasticsearch.action.search.SearchResponse; | ||
| import org.elasticsearch.cluster.health.ClusterHealthStatus; | ||
| import org.elasticsearch.cluster.node.DiscoveryNode; | ||
| import org.elasticsearch.cluster.node.DiscoveryNodeRole; | ||
| import org.elasticsearch.common.Priority; | ||
| import org.elasticsearch.common.settings.Settings; | ||
| import org.elasticsearch.index.query.QueryBuilders; | ||
| import org.elasticsearch.index.query.QueryRewriteContext; | ||
| import org.elasticsearch.plugins.Plugin; | ||
| import org.elasticsearch.search.MockSearchService; | ||
| import org.elasticsearch.search.builder.SearchSourceBuilder; | ||
| import org.elasticsearch.test.ESIntegTestCase; | ||
| import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; | ||
| import org.elasticsearch.xcontent.XContentBuilder; | ||
| import org.junit.Before; | ||
|
|
||
| import java.io.IOException; | ||
| import java.util.Collection; | ||
| import java.util.List; | ||
|
|
||
| import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; | ||
| import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; | ||
| import static org.hamcrest.Matchers.containsString; | ||
| import static org.hamcrest.Matchers.equalTo; | ||
|
|
||
| @ESIntegTestCase.ClusterScope(numDataNodes = 3) | ||
| public class RetrieverRewriteIT extends ESIntegTestCase { | ||
| @Override | ||
| protected Collection<Class<? extends Plugin>> nodePlugins() { | ||
| return List.of(MockSearchService.TestPlugin.class); | ||
| } | ||
|
|
||
| private static String INDEX_DOCS = "docs"; | ||
| private static String INDEX_QUERIES = "queries"; | ||
| private static final String ID_FIELD = "_id"; | ||
| private static final String QUERY_FIELD = "query"; | ||
|
|
||
| @Before | ||
| public void setup() throws Exception { | ||
| createIndex(INDEX_DOCS); | ||
| index(INDEX_DOCS, "doc_0", "{}"); | ||
| index(INDEX_DOCS, "doc_1", "{}"); | ||
| index(INDEX_DOCS, "doc_2", "{}"); | ||
| refresh(INDEX_DOCS); | ||
|
|
||
| createIndex(INDEX_QUERIES); | ||
| index(INDEX_QUERIES, "query_0", "{ \"" + QUERY_FIELD + "\": \"doc_2\"}"); | ||
| index(INDEX_QUERIES, "query_1", "{ \"" + QUERY_FIELD + "\": \"doc_1\"}"); | ||
| index(INDEX_QUERIES, "query_2", "{ \"" + QUERY_FIELD + "\": \"doc_0\"}"); | ||
| refresh(INDEX_QUERIES); | ||
| } | ||
|
|
||
| public void testRewrite() { | ||
| SearchSourceBuilder source = new SearchSourceBuilder(); | ||
| StandardRetrieverBuilder standard = new StandardRetrieverBuilder(); | ||
| standard.queryBuilder = QueryBuilders.termQuery(ID_FIELD, "doc_0"); | ||
| source.retriever(new AssertingRetrieverBuilder(standard)); | ||
| SearchRequestBuilder req = client().prepareSearch(INDEX_DOCS, INDEX_QUERIES).setSource(source); | ||
| ElasticsearchAssertions.assertResponse(req, resp -> { | ||
| assertNull(resp.pointInTimeId()); | ||
| assertNotNull(resp.getHits().getTotalHits()); | ||
| assertThat(resp.getHits().getTotalHits().value, equalTo(1L)); | ||
| assertThat(resp.getHits().getTotalHits().relation, equalTo(TotalHits.Relation.EQUAL_TO)); | ||
| assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_0")); | ||
| }); | ||
| } | ||
|
|
||
| public void testRewriteCompound() { | ||
| SearchSourceBuilder source = new SearchSourceBuilder(); | ||
| source.retriever(new AssertingCompoundRetrieverBuilder("query_0")); | ||
| SearchRequestBuilder req = client().prepareSearch(INDEX_DOCS, INDEX_QUERIES).setSource(source); | ||
| ElasticsearchAssertions.assertResponse(req, resp -> { | ||
| assertNull(resp.pointInTimeId()); | ||
| assertNotNull(resp.getHits().getTotalHits()); | ||
| assertThat(resp.getHits().getTotalHits().value, equalTo(1L)); | ||
| assertThat(resp.getHits().getTotalHits().relation, equalTo(TotalHits.Relation.EQUAL_TO)); | ||
| assertThat(resp.getHits().getAt(0).getId(), equalTo("doc_2")); | ||
| }); | ||
| } | ||
|
|
||
| public void testRewriteCompoundRetrieverShouldThrowForPartialResults() throws Exception { | ||
| final String testIndex = "test"; | ||
| createIndex(testIndex, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 3).put(SETTING_NUMBER_OF_REPLICAS, 0).build()); | ||
| for (int i = 0; i < 50; i++) { | ||
| index(testIndex, "doc_" + i, "{}"); | ||
| } | ||
| refresh(testIndex); | ||
|
|
||
| SearchSourceBuilder source = new SearchSourceBuilder(); | ||
| source.retriever(new AssertingCompoundRetrieverBuilder("doc_0")); | ||
| final String randomDataNode = internalCluster().getNodeNameThat( | ||
| settings -> DiscoveryNode.hasRole(settings, DiscoveryNodeRole.DATA_ROLE) | ||
| ); | ||
| try { | ||
| ensureGreen(testIndex); | ||
| if (false == internalCluster().stopNode(randomDataNode)) { | ||
| throw new IllegalStateException("node did not stop"); | ||
| } | ||
| assertBusy(() -> { | ||
| ClusterHealthResponse healthResponse = clusterAdmin().prepareHealth(testIndex) | ||
| .setWaitForStatus(ClusterHealthStatus.RED) // we are now known red because the primary shard is missing | ||
| .setWaitForEvents(Priority.LANGUID) // ensures that the update has occurred | ||
| .execute() | ||
| .actionGet(); | ||
| assertThat(healthResponse.getStatus(), equalTo(ClusterHealthStatus.RED)); | ||
| }); | ||
| SearchPhaseExecutionException ex = expectThrows( | ||
| SearchPhaseExecutionException.class, | ||
| client().prepareSearch(testIndex).setSource(source)::get | ||
| ); | ||
| assertThat( | ||
| ex.getDetailedMessage(), | ||
| containsString("[open_point_in_time] action requires all shards to be available. Missing shards") | ||
| ); | ||
| } finally { | ||
| internalCluster().restartNode(randomDataNode); | ||
| } | ||
| } | ||
|
|
||
| private static class AssertingRetrieverBuilder extends RetrieverBuilder { | ||
| private final RetrieverBuilder innerRetriever; | ||
|
|
||
| private AssertingRetrieverBuilder(RetrieverBuilder innerRetriever) { | ||
| this.innerRetriever = innerRetriever; | ||
| } | ||
|
|
||
| @Override | ||
| public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException { | ||
| assertNull(ctx.getPointInTimeBuilder()); | ||
| assertNull(ctx.convertToInnerHitsRewriteContext()); | ||
| assertNull(ctx.convertToCoordinatorRewriteContext()); | ||
| assertNull(ctx.convertToIndexMetadataContext()); | ||
| assertNull(ctx.convertToSearchExecutionContext()); | ||
| assertNull(ctx.convertToDataRewriteContext()); | ||
| var newRetriever = innerRetriever.rewrite(ctx); | ||
| if (newRetriever != innerRetriever) { | ||
| return new AssertingRetrieverBuilder(newRetriever); | ||
| } | ||
| return this; | ||
| } | ||
|
|
||
| @Override | ||
| public void extractToSearchSourceBuilder(SearchSourceBuilder sourceBuilder, boolean compoundUsed) { | ||
| assertNull(sourceBuilder.retriever()); | ||
| innerRetriever.extractToSearchSourceBuilder(sourceBuilder, compoundUsed); | ||
| } | ||
|
|
||
| @Override | ||
| public String getName() { | ||
| return "asserting"; | ||
| } | ||
|
|
||
| @Override | ||
| protected void doToXContent(XContentBuilder builder, Params params) throws IOException {} | ||
|
|
||
| @Override | ||
| protected boolean doEquals(Object o) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| protected int doHashCode() { | ||
| return innerRetriever.doHashCode(); | ||
| } | ||
| } | ||
|
|
||
| private static class AssertingCompoundRetrieverBuilder extends RetrieverBuilder { | ||
| private final String id; | ||
| private final SetOnce<RetrieverBuilder> innerRetriever; | ||
|
|
||
| private AssertingCompoundRetrieverBuilder(String id) { | ||
| this.id = id; | ||
| this.innerRetriever = new SetOnce<>(null); | ||
| } | ||
|
|
||
| private AssertingCompoundRetrieverBuilder(String id, SetOnce<RetrieverBuilder> innerRetriever) { | ||
| this.id = id; | ||
| this.innerRetriever = innerRetriever; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean isCompound() { | ||
| return true; | ||
| } | ||
|
|
||
| @Override | ||
| public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException { | ||
| assertNotNull(ctx.getPointInTimeBuilder()); | ||
| assertNull(ctx.convertToInnerHitsRewriteContext()); | ||
| assertNull(ctx.convertToCoordinatorRewriteContext()); | ||
| assertNull(ctx.convertToIndexMetadataContext()); | ||
| assertNull(ctx.convertToSearchExecutionContext()); | ||
| assertNull(ctx.convertToDataRewriteContext()); | ||
| if (innerRetriever.get() != null) { | ||
| return this; | ||
| } | ||
| SetOnce<RetrieverBuilder> innerRetriever = new SetOnce<>(); | ||
| ctx.registerAsyncAction((client, actionListener) -> { | ||
| SearchSourceBuilder source = new SearchSourceBuilder().pointInTimeBuilder(ctx.getPointInTimeBuilder()) | ||
| .query(QueryBuilders.termQuery(ID_FIELD, id)) | ||
| .fetchField(QUERY_FIELD); | ||
| client.search(new SearchRequest().source(source), new ActionListener<>() { | ||
| @Override | ||
| public void onResponse(SearchResponse response) { | ||
| String query = response.getHits().getAt(0).field(QUERY_FIELD).getValue(); | ||
|
jimczi marked this conversation as resolved.
|
||
| StandardRetrieverBuilder standard = new StandardRetrieverBuilder(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume in the main code we want retrievers to be rewritten into some kind of ScoreDocQuery, but not to another retriever?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Retrievers will always rewrite to a retriever, that's enforced by the design. Using some kind of ScoreDocQuery will be internally done by the rewritten retriever when |
||
| standard.queryBuilder = QueryBuilders.termQuery(ID_FIELD, query); | ||
| innerRetriever.set(standard); | ||
| actionListener.onResponse(null); | ||
| } | ||
|
|
||
| @Override | ||
| public void onFailure(Exception e) { | ||
| actionListener.onFailure(e); | ||
| } | ||
| }); | ||
| }); | ||
| return new AssertingCompoundRetrieverBuilder(id, innerRetriever); | ||
| } | ||
|
|
||
| @Override | ||
| public void extractToSearchSourceBuilder(SearchSourceBuilder sourceBuilder, boolean compoundUsed) { | ||
| assertNull(sourceBuilder.retriever()); | ||
| innerRetriever.get().extractToSearchSourceBuilder(sourceBuilder, compoundUsed); | ||
| } | ||
|
|
||
| @Override | ||
| public String getName() { | ||
| return "asserting"; | ||
|
jimczi marked this conversation as resolved.
|
||
| } | ||
|
|
||
| @Override | ||
| protected void doToXContent(XContentBuilder builder, Params params) throws IOException { | ||
| throw new AssertionError("not implemented"); | ||
| } | ||
|
|
||
| @Override | ||
| protected boolean doEquals(Object o) { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| protected int doHashCode() { | ||
| return id.hashCode(); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -709,7 +709,9 @@ public void sendSearchResponse(SearchResponseSections internalSearchResponse, At | |
| if (buildPointInTimeFromSearchResults()) { | ||
| searchContextId = SearchContextId.encode(queryResults.asList(), aliasFilter, minTransportVersion); | ||
| } else { | ||
| if (request.source() != null && request.source().pointInTimeBuilder() != null) { | ||
| if (request.source() != null | ||
| && request.source().pointInTimeBuilder() != null | ||
| && request.source().pointInTimeBuilder().singleSession() == false) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it safe to rely on the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just about returning the pit id in the response. The logic to release the pit automatically is restricted to the retriever builder case. It might be surprising for existing pit users though since -1 is a valid keep-alive value. We should be able to restrict this behavior to retrievers entirely. |
||
| searchContextId = request.source().pointInTimeBuilder().getEncodedId(); | ||
| } else { | ||
| searchContextId = null; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -320,9 +320,10 @@ public void writeTo(StreamOutput out) throws IOException { | |
| public ActionRequestValidationException validate() { | ||
| ActionRequestValidationException validationException = null; | ||
| boolean scroll = scroll() != null; | ||
| boolean allowPartialSearchResults = allowPartialSearchResults() != null && allowPartialSearchResults(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❤️ |
||
|
|
||
| if (source != null) { | ||
| validationException = source.validate(validationException, scroll); | ||
| validationException = source.validate(validationException, scroll, allowPartialSearchResults); | ||
| } | ||
| if (scroll) { | ||
| if (requestCache != null && requestCache) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.