-
Notifications
You must be signed in to change notification settings - Fork 25.8k
Fix reader context leak when query response serialization fails #144708
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 26 commits
ad168f7
0af5d96
7ced78f
7392231
c489112
38625df
31d17d1
faa8065
cbd6513
65d2576
e7201ab
28bb96d
2f13bfd
2f1268f
0b43c91
f3c906d
d91aab2
3a96957
b9c9d88
da2502e
c544194
c46290a
a318fff
422086d
bbb3b05
7a99b23
5ae4e33
5ffde5c
06a53c2
13be3b6
4d289a2
5337d43
8133e6e
a1a5383
b367d84
cf22a41
4fb1e87
5d58fda
acf0d40
d7d20a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -156,6 +156,7 @@ | |
| import java.util.concurrent.atomic.AtomicBoolean; | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import java.util.concurrent.atomic.AtomicReference; | ||
| import java.util.function.BiFunction; | ||
| import java.util.function.Function; | ||
| import java.util.function.LongSupplier; | ||
|
|
@@ -754,11 +755,31 @@ public void executeQueryPhase(ShardSearchRequest request, CancellableTask task, | |
| } | ||
| // TODO: i think it makes sense to always do a canMatch here and | ||
| // return an empty response (not null response) in case canMatch is false? | ||
| ensureAfterSeqNoRefreshed(shard, orig, () -> executeQueryPhase(orig, task), l); | ||
| executeQueryPhaseAsync(shard, orig, task, l); | ||
| }) | ||
| ); | ||
| } | ||
|
|
||
| private void executeQueryPhaseAsync( | ||
| IndexShard shard, | ||
| ShardSearchRequest request, | ||
| CancellableTask task, | ||
| ActionListener<SearchPhaseResult> listener | ||
| ) { | ||
| // wrapFailureListener requires readerContext and markAsUsed, but those are created inside the supplier | ||
| // lambda below. The ActionListener.wrap callbacks are constructed (before the supplier runs) and must | ||
| // therefore read the listener indirectly. completionListenerRef starts as the plain listener so that any | ||
| // failure before the supplier runs is still forwarded. Once the supplier sets it to the wrapped version, | ||
| // the ActionListener.wrap callbacks will invoke the one that handles readerContext cleanup on failure. | ||
| final var completionListenerRef = new AtomicReference<>(listener); | ||
| ensureAfterSeqNoRefreshed(shard, request, () -> { | ||
| final ReaderContext readerContext = createOrGetReaderContext(request); | ||
| final Releasable markAsUsed = readerContext.markAsUsed(getKeepAlive(request)); | ||
| completionListenerRef.set(wrapFailureListener(listener, readerContext, markAsUsed)); | ||
| return executeQueryPhase(request, task, readerContext); | ||
| }, ActionListener.wrap(result -> completionListenerRef.get().onResponse(result), e -> completionListenerRef.get().onFailure(e))); | ||
| } | ||
|
|
||
| private <T extends RefCounted> void ensureAfterSeqNoRefreshed( | ||
| IndexShard shard, | ||
| ShardSearchRequest request, | ||
|
|
@@ -908,11 +929,10 @@ private static <T extends RefCounted> void runAsync( | |
| * It is the responsibility of the caller to ensure that the ref count is correctly decremented | ||
| * when the object is no longer needed. | ||
| */ | ||
| private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, CancellableTask task) throws Exception { | ||
| final ReaderContext readerContext = createOrGetReaderContext(request); | ||
| private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, CancellableTask task, ReaderContext readerContext) | ||
| throws Exception { | ||
| try ( | ||
| Releasable scope = tracer.withScope(task); | ||
| Releasable ignored = readerContext.markAsUsed(getKeepAlive(request)); | ||
| SearchContext context = createContext(readerContext, request, task, ResultsType.QUERY, true) | ||
| ) { | ||
| tracer.startTrace("executeQueryPhase", Map.of()); | ||
|
|
@@ -967,7 +987,6 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, Cancella | |
| : new ElasticsearchException(e.getCause()); | ||
| } | ||
| logger.trace("Query phase failed", e); | ||
| processFailure(readerContext, e); | ||
| throw e; | ||
| } | ||
| } | ||
|
|
@@ -1651,36 +1670,36 @@ private <T> ActionListener<T> releaseCircuitBreakerOnResponse( | |
| ActionListener<T> listener, | ||
| Function<T, FetchSearchResult> fetchResultExtractor | ||
| ) { | ||
| return ActionListener.wrap(response -> { | ||
| try { | ||
| listener.onResponse(response); | ||
| } finally { | ||
| // Release bytes after the response handler completes | ||
| FetchSearchResult fetchResult = fetchResultExtractor.apply(response); | ||
| if (fetchResult != null) { | ||
| fetchResult.releaseCircuitBreakerBytes(circuitBreaker); | ||
| } | ||
| } | ||
| }, listener::onFailure); | ||
| } | ||
|
|
||
| private <T> ActionListener<T> wrapFailureListener(ActionListener<T> listener, ReaderContext context, Releasable releasable) { | ||
| return new ActionListener<>() { | ||
| @Override | ||
| public void onResponse(T resp) { | ||
| Releasables.close(releasable); | ||
| listener.onResponse(resp); | ||
| public void onResponse(T response) { | ||
| try { | ||
| listener.onResponse(response); | ||
| } finally { | ||
| // Release bytes after the response handler completes, even if it throws. | ||
| // Exceptions are intentionally allowed to propagate so that wrapFailureListener | ||
| // can observe them and free the reader context via processFailure. | ||
| FetchSearchResult fetchResult = fetchResultExtractor.apply(response); | ||
| if (fetchResult != null) { | ||
| fetchResult.releaseCircuitBreakerBytes(circuitBreaker); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public void onFailure(Exception exc) { | ||
| processFailure(context, exc); | ||
| Releasables.close(releasable); | ||
| listener.onFailure(exc); | ||
| public void onFailure(Exception e) { | ||
| listener.onFailure(e); | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| private <T> ActionListener<T> wrapFailureListener(ActionListener<T> listener, ReaderContext context, Releasable releasable) { | ||
| return ActionListener.releaseAfter(ActionListener.wrap(listener::onResponse, e -> { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. perhaps a personal flaw but I find it very difficult to understand what we mean by Can you please document a bit the method and code? Would it make sense to make this method package visible and unit test it?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you, Andrei, for the feedback. Agreed, I documented the code and added unit tests by making the method package visible. |
||
| processFailure(context, e); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are we in trouble if processFailure fails?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In practice, |
||
| listener.onFailure(e); | ||
| }), releasable); | ||
| } | ||
|
|
||
| private static boolean isScrollContext(ReaderContext context) { | ||
| return context instanceof LegacyReaderContext && context.singleSession() == false; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this code sequence is hard to read/understand for me with the atomicreference setting and the access in the next argument. Do you have any idea for simplification?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’ve added a comment to clarify the understanding of the code and the need of the completionListenerRef