-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Graph: fix race condition in timeout #88946
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| pr: 88946 | ||
| summary: "Graph: fix race condition in timeout" | ||
| area: Graph | ||
| type: bug | ||
| issues: | ||
| - 55396 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,8 @@ | |
| */ | ||
| package org.elasticsearch.xpack.graph.action; | ||
|
|
||
| import org.apache.logging.log4j.LogManager; | ||
| import org.apache.logging.log4j.Logger; | ||
| import org.apache.lucene.search.BooleanQuery; | ||
| import org.apache.lucene.util.BytesRef; | ||
| import org.apache.lucene.util.PriorityQueue; | ||
|
|
@@ -61,13 +63,13 @@ | |
| import java.util.Set; | ||
| import java.util.SortedSet; | ||
| import java.util.TreeSet; | ||
| import java.util.concurrent.atomic.AtomicBoolean; | ||
|
|
||
| /** | ||
| * Performs a series of elasticsearch queries and aggregations to explore | ||
| * connected terms in a single index. | ||
| */ | ||
| public class TransportGraphExploreAction extends HandledTransportAction<GraphExploreRequest, GraphExploreResponse> { | ||
| private static final Logger logger = LogManager.getLogger(TransportGraphExploreAction.class); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
|
||
| private final ThreadPool threadPool; | ||
| private final NodeClient client; | ||
|
|
@@ -115,7 +117,6 @@ class AsyncGraphAction { | |
| private final ActionListener<GraphExploreResponse> listener; | ||
|
|
||
| private final long startTime; | ||
| private final AtomicBoolean timedOut; | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was pretty unnecessary. We only ever set it one time so I just pass it in. |
||
| private volatile ShardOperationFailedException[] shardFailures; | ||
| private Map<VertexId, Vertex> vertices = new HashMap<>(); | ||
| private Map<ConnectionId, Connection> connections = new HashMap<>(); | ||
|
|
@@ -128,7 +129,6 @@ class AsyncGraphAction { | |
| this.request = request; | ||
| this.listener = listener; | ||
| this.startTime = threadPool.relativeTimeInMillis(); | ||
| this.timedOut = new AtomicBoolean(false); | ||
| this.shardFailures = ShardSearchFailure.EMPTY_ARRAY; | ||
| } | ||
|
|
||
|
|
@@ -173,16 +173,11 @@ private void removeVertex(Vertex vertex) { | |
| * connections | ||
| */ | ||
| synchronized void expand() { | ||
| if (hasTimedOut()) { | ||
| timedOut.set(true); | ||
| listener.onResponse(buildResponse()); | ||
| return; | ||
| } | ||
| Map<String, Set<Vertex>> lastHopFindings = hopFindings.get(currentHopNumber); | ||
| if ((currentHopNumber >= (request.getHopNumbers() - 1)) || (lastHopFindings == null) || (lastHopFindings.size() == 0)) { | ||
| // Either we gathered no leads from the last hop or we have | ||
| // reached the final hop | ||
| listener.onResponse(buildResponse()); | ||
| listener.onResponse(buildResponse(false)); | ||
| return; | ||
| } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's the bit where we return the response even if we're over time. If we're done we may as well return anyway, I say.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is fine for me |
||
| Hop lastHop = request.getHop(currentHopNumber); | ||
|
|
@@ -318,16 +313,22 @@ synchronized void expand() { | |
| // Execute the search | ||
| SearchSourceBuilder source = new SearchSourceBuilder().query(rootBool).aggregation(sampleAgg).size(0); | ||
| if (request.timeout() != null) { | ||
| source.timeout(TimeValue.timeValueMillis(timeRemainingMillis())); | ||
| // Actual resolution of timer is granularity of the interval | ||
| // configured globally for updating estimated time. | ||
| long timeRemainingMillis = startTime + request.timeout().millis() - threadPool.relativeTimeInMillis(); | ||
| if (timeRemainingMillis <= 0) { | ||
| listener.onResponse(buildResponse(true)); | ||
| return; | ||
| } | ||
|
|
||
| source.timeout(TimeValue.timeValueMillis(timeRemainingMillis)); | ||
| } | ||
| searchRequest.source(source); | ||
|
|
||
| // System.out.println(source); | ||
| logger.trace("executing expansion graph search request"); | ||
| client.search(searchRequest, new ActionListener.Delegating<>(listener) { | ||
| @Override | ||
| public void onResponse(SearchResponse searchResponse) { | ||
| // System.out.println(searchResponse); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These just seemed like leftovers. I figure we'd dump these into the |
||
| addShardFailures(searchResponse.getShardFailures()); | ||
|
|
||
| ArrayList<Connection> newConnections = new ArrayList<Connection>(); | ||
|
|
@@ -676,7 +677,6 @@ public synchronized void start() { | |
| source.timeout(request.timeout()); | ||
| } | ||
| searchRequest.source(source); | ||
| // System.out.println(source); | ||
| logger.trace("executing initial graph search request"); | ||
| client.search(searchRequest, new ActionListener.Delegating<>(listener) { | ||
| @Override | ||
|
|
@@ -774,16 +774,6 @@ private void addNormalizedBoosts(BoolQueryBuilder includesContainer, VertexReque | |
| } | ||
| } | ||
|
|
||
| boolean hasTimedOut() { | ||
| return request.timeout() != null && (timeRemainingMillis() <= 0); | ||
| } | ||
|
|
||
| long timeRemainingMillis() { | ||
| // Actual resolution of timer is granularity of the interval | ||
| // configured globally for updating estimated time. | ||
| return (startTime + request.timeout().millis()) - threadPool.relativeTimeInMillis(); | ||
| } | ||
|
|
||
| void addShardFailures(ShardOperationFailedException[] failures) { | ||
| if (CollectionUtils.isEmpty(failures) == false) { | ||
| ShardOperationFailedException[] duplicates = new ShardOperationFailedException[shardFailures.length + failures.length]; | ||
|
|
@@ -793,9 +783,9 @@ void addShardFailures(ShardOperationFailedException[] failures) { | |
| } | ||
| } | ||
|
|
||
| protected GraphExploreResponse buildResponse() { | ||
| protected GraphExploreResponse buildResponse(boolean timedOut) { | ||
| long took = threadPool.relativeTimeInMillis() - startTime; | ||
| return new GraphExploreResponse(took, timedOut.get(), shardFailures, vertices, connections, request.returnDetailedInfo()); | ||
| return new GraphExploreResponse(took, timedOut, shardFailures, vertices, connections, request.returnDetailedInfo()); | ||
| } | ||
|
|
||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had to move the script query causing the timeout to the hop before the last hop because we no longer check the timeout on the final response. If we get a full response from the query we return it even if we're above the timeout time.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I figured that was fine because it should be quite fast.