Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Query-level resource usages tracking #13172

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Remote Store] Add support to disable flush based on translog reader count ([#14027](https://github.com/opensearch-project/OpenSearch/pull/14027))
- [Query Insights] Add exporter support for top n queries ([#12982](https://github.com/opensearch-project/OpenSearch/pull/12982))
- [Query Insights] Add X-Opaque-Id to search request metadata for top n queries ([#13374](https://github.com/opensearch-project/OpenSearch/pull/13374))
- Add support for query level resource usage tracking ([#13172](https://github.com/opensearch-project/OpenSearch/pull/13172))

### Dependencies
- Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.13 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329), [#13559](https://github.com/opensearch-project/OpenSearch/pull/13559))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ public long getTotalValue() {
return endValue.get() - startValue;
}

public long getStartValue() {
return startValue;
}

@Override
public String toString() {
return String.valueOf(getTotalValue());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.core.tasks.resourcetracker;

import org.opensearch.common.annotation.PublicApi;
import org.opensearch.core.ParseField;
import org.opensearch.core.common.Strings;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
import org.opensearch.core.xcontent.ConstructingObjectParser;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.ToXContentObject;
import org.opensearch.core.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.Objects;

import static org.opensearch.core.xcontent.ConstructingObjectParser.constructorArg;

/**
* Task resource usage information with minimal information about the task
* <p>
* Writeable TaskResourceInfo objects are used to represent resource usage
* information of running tasks, which can be propagated to coordinator node
* to infer query-level resource usage
*
* @opensearch.api
*/
@PublicApi(since = "2.15.0")
public class TaskResourceInfo implements Writeable, ToXContentObject {
private final String action;
private final long taskId;
private final long parentTaskId;
private final String nodeId;
private final TaskResourceUsage taskResourceUsage;

private static final ParseField ACTION = new ParseField("action");
private static final ParseField TASK_ID = new ParseField("taskId");
private static final ParseField PARENT_TASK_ID = new ParseField("parentTaskId");
private static final ParseField NODE_ID = new ParseField("nodeId");
private static final ParseField TASK_RESOURCE_USAGE = new ParseField("taskResourceUsage");

public TaskResourceInfo(
final String action,
final long taskId,
final long parentTaskId,
final String nodeId,
final TaskResourceUsage taskResourceUsage
) {
this.action = action;
this.taskId = taskId;
this.parentTaskId = parentTaskId;
this.nodeId = nodeId;
this.taskResourceUsage = taskResourceUsage;
}

public static final ConstructingObjectParser<TaskResourceInfo, Void> PARSER = new ConstructingObjectParser<>(
"task_resource_info",
a -> new Builder().setAction((String) a[0])
.setTaskId((Long) a[1])
.setParentTaskId((Long) a[2])
.setNodeId((String) a[3])
.setTaskResourceUsage((TaskResourceUsage) a[4])
.build()
);

static {
PARSER.declareString(constructorArg(), ACTION);
PARSER.declareLong(constructorArg(), TASK_ID);
PARSER.declareLong(constructorArg(), PARENT_TASK_ID);
PARSER.declareString(constructorArg(), NODE_ID);
PARSER.declareObject(constructorArg(), TaskResourceUsage.PARSER, TASK_RESOURCE_USAGE);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(ACTION.getPreferredName(), this.action);
builder.field(TASK_ID.getPreferredName(), this.taskId);
builder.field(PARENT_TASK_ID.getPreferredName(), this.parentTaskId);
builder.field(NODE_ID.getPreferredName(), this.nodeId);
builder.startObject(TASK_RESOURCE_USAGE.getPreferredName());
this.taskResourceUsage.toXContent(builder, params);
msfroh marked this conversation as resolved.
Show resolved Hide resolved
builder.endObject();
ansjcy marked this conversation as resolved.
Show resolved Hide resolved
builder.endObject();
return builder;
}

/**
* Builder for {@link TaskResourceInfo}
*/
public static class Builder {
private TaskResourceUsage taskResourceUsage;
private String action;
private long taskId;
msfroh marked this conversation as resolved.
Show resolved Hide resolved
private long parentTaskId;
private String nodeId;

public Builder setTaskResourceUsage(final TaskResourceUsage taskResourceUsage) {
this.taskResourceUsage = taskResourceUsage;
return this;
}

public Builder setAction(final String action) {
this.action = action;
return this;
}

public Builder setTaskId(final long taskId) {
this.taskId = taskId;
return this;
}

public Builder setParentTaskId(final long parentTaskId) {
this.parentTaskId = parentTaskId;
return this;
}

public Builder setNodeId(final String nodeId) {
this.nodeId = nodeId;
return this;
}

public TaskResourceInfo build() {
return new TaskResourceInfo(action, taskId, parentTaskId, nodeId, taskResourceUsage);
}
}

/**
* Read task info from a stream.
*
* @param in StreamInput to read
* @return {@link TaskResourceInfo}
* @throws IOException IOException
*/
public static TaskResourceInfo readFromStream(StreamInput in) throws IOException {
return new TaskResourceInfo.Builder().setAction(in.readString())
.setTaskId(in.readLong())
.setParentTaskId(in.readLong())
.setNodeId(in.readString())
.setTaskResourceUsage(TaskResourceUsage.readFromStream(in))
.build();
}

/**
* Get TaskResourceUsage
*
* @return taskResourceUsage
*/
public TaskResourceUsage getTaskResourceUsage() {
return taskResourceUsage;
}

/**
* Get parent task id
*
* @return parent task id
*/
public long getParentTaskId() {
return parentTaskId;
}

/**
* Get task id
* @return task id
*/
public long getTaskId() {
return taskId;
}

/**
* Get node id
* @return node id
*/
public String getNodeId() {
return nodeId;
}

/**
* Get task action
* @return task action
*/
public String getAction() {
return action;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(action);
out.writeLong(taskId);
out.writeLong(parentTaskId);
out.writeString(nodeId);
taskResourceUsage.writeTo(out);
}

@Override
public String toString() {
return Strings.toString(MediaTypeRegistry.JSON, this);
}

@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != TaskResourceInfo.class) {
return false;

Check warning on line 211 in libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java

View check run for this annotation

Codecov / codecov/patch

libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java#L211

Added line #L211 was not covered by tests
}
TaskResourceInfo other = (TaskResourceInfo) obj;
return action.equals(other.action)
&& taskId == other.taskId
&& parentTaskId == other.parentTaskId
&& Objects.equals(nodeId, other.nodeId)
&& taskResourceUsage.equals(other.taskResourceUsage);
}

@Override
public int hashCode() {
return Objects.hash(action, taskId, parentTaskId, nodeId, taskResourceUsage);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.action.ShardOperationFailedException;
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo;
import org.opensearch.search.SearchPhaseResult;
import org.opensearch.search.SearchShardTarget;
import org.opensearch.search.internal.AliasFilter;
Expand Down Expand Up @@ -469,6 +470,10 @@ private void onRequestEnd(SearchRequestContext searchRequestContext) {
this.searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(this, searchRequestContext);
}

private void onRequestFailure(SearchRequestContext searchRequestContext) {
this.searchRequestContext.getSearchRequestOperationsListener().onRequestFailure(this, searchRequestContext);
}

private void executePhase(SearchPhase phase) {
Span phaseSpan = tracer.startSpan(SpanCreationContext.server().name("[phase/" + phase.getName() + "]"));
try (final SpanScope scope = tracer.withSpanInScope(phaseSpan)) {
Expand Down Expand Up @@ -507,6 +512,7 @@ ShardSearchFailure[] buildShardFailures() {
private void onShardFailure(final int shardIndex, @Nullable SearchShardTarget shard, final SearchShardIterator shardIt, Exception e) {
// we always add the shard failure for a specific shard instance
// we do make sure to clean it on a successful response from a shard
setPhaseResourceUsages();
onShardFailure(shardIndex, shard, e);
SearchShardTarget nextShard = FailAwareWeightedRouting.getInstance()
.findNext(shardIt, clusterState, e, () -> totalOps.incrementAndGet());
Expand Down Expand Up @@ -618,9 +624,15 @@ protected void onShardResult(Result result, SearchShardIterator shardIt) {
if (logger.isTraceEnabled()) {
logger.trace("got first-phase result from {}", result != null ? result.getSearchShardTarget() : null);
}
this.setPhaseResourceUsages();
msfroh marked this conversation as resolved.
Show resolved Hide resolved
results.consumeResult(result, () -> onShardResultConsumed(result, shardIt));
}

public void setPhaseResourceUsages() {
TaskResourceInfo taskResourceUsage = searchRequestContext.getTaskResourceUsageSupplier().get();
searchRequestContext.recordPhaseResourceUsage(taskResourceUsage);
}

private void onShardResultConsumed(Result result, SearchShardIterator shardIt) {
successfulOps.incrementAndGet();
// clean a previous error on this shard group (note, this code will be serialized on the same shardIndex value level
Expand Down Expand Up @@ -751,6 +763,7 @@ public void sendSearchResponse(InternalSearchResponse internalSearchResponse, At

@Override
public final void onPhaseFailure(SearchPhase phase, String msg, Throwable cause) {
setPhaseResourceUsages();
if (currentPhaseHasLifecycle) {
this.searchRequestContext.getSearchRequestOperationsListener().onPhaseFailure(this, cause);
}
Expand Down Expand Up @@ -780,6 +793,7 @@ private void raisePhaseFailure(SearchPhaseExecutionException exception) {
});
}
Releasables.close(releasables);
onRequestFailure(searchRequestContext);
listener.onFailure(exception);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ private void executeFetch(
public void innerOnResponse(FetchSearchResult result) {
try {
progressListener.notifyFetchResult(shardIndex);
context.setPhaseResourceUsages();
ansjcy marked this conversation as resolved.
Show resolved Hide resolved
counter.onResult(result);
} catch (Exception e) {
context.onPhaseFailure(FetchSearchPhase.this, "", e);
Expand All @@ -254,6 +255,7 @@ public void onFailure(Exception e) {
e
);
progressListener.notifyFetchFailure(shardIndex, shardTarget, e);
context.setPhaseResourceUsages();
counter.onFailure(shardIndex, shardTarget, e);
} finally {
// the search context might not be cleared on the node where the fetch was executed for example
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,9 @@ default void sendReleaseSearchContext(
* Registers a {@link Releasable} that will be closed when the search request finishes or fails.
*/
void addReleasable(Releasable releasable);

/**
* Set the resource usage info for this phase
*/
void setPhaseResourceUsages();
}
Loading
Loading