diff --git a/.github/workflows/dco.yml b/.github/workflows/dco.yml index 9580d510fd108..ef842bb405d60 100644 --- a/.github/workflows/dco.yml +++ b/.github/workflows/dco.yml @@ -9,7 +9,7 @@ jobs: steps: - name: Get PR Commits id: 'get-pr-commits' - uses: tim-actions/get-pr-commits@v1.1.0 + uses: tim-actions/get-pr-commits@v1.3.1 with: token: ${{ secrets.GITHUB_TOKEN }} - name: DCO Check diff --git a/CHANGELOG.md b/CHANGELOG.md index 942f1ffd86a04..5dfa91e672999 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Move Remote Store Migration from DocRep to GA and modify remote migration settings name ([#14100](https://github.com/opensearch-project/OpenSearch/pull/14100)) - [Remote State] Add async remote state deletion task running on an interval, configurable by a setting ([#13995](https://github.com/opensearch-project/OpenSearch/pull/13995)) - Add remote routing table for remote state publication with experimental feature flag ([#13304](https://github.com/opensearch-project/OpenSearch/pull/13304)) +- Add support for query level resource usage tracking ([#13172](https://github.com/opensearch-project/OpenSearch/pull/13172)) +- [Query Insights] Add cpu and memory metrics to top n queries ([#13739](https://github.com/opensearch-project/OpenSearch/pull/13739)) - Derived field object type support ([#13720](https://github.com/opensearch-project/OpenSearch/pull/13720)) ### Dependencies @@ -40,11 +42,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.xmlbeans:xmlbeans` from 5.2.0 to 5.2.1 ([#13839](https://github.com/opensearch-project/OpenSearch/pull/13839)) - Bump `actions/checkout` from 3 to 4 ([#13935](https://github.com/opensearch-project/OpenSearch/pull/13935)) - Bump `com.netflix.nebula.ospackage-base` from 11.9.0 to 11.9.1 ([#13933](https://github.com/opensearch-project/OpenSearch/pull/13933)) +- Bump `com.azure:azure-core-http-netty` from 1.12.8 to 1.15.1 ([#14128](https://github.com/opensearch-project/OpenSearch/pull/14128)) +- Bump `tim-actions/get-pr-commits` from 1.1.0 to 1.3.1 ([#14126](https://github.com/opensearch-project/OpenSearch/pull/14126)) ### Changed - Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650)) - Refactor implementations of query phase searcher, allow QueryCollectorContext to have zero collectors ([#13481](https://github.com/opensearch-project/OpenSearch/pull/13481)) - Adds support to inject telemetry instances to plugins ([#13636](https://github.com/opensearch-project/OpenSearch/pull/13636)) +- Adds support to provide tags with value in Gauge metric. ([#13994](https://github.com/opensearch-project/OpenSearch/pull/13994)) - Move cache removal notifications outside lru lock ([#14017](https://github.com/opensearch-project/OpenSearch/pull/14017)) ### Deprecated diff --git a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/ResourceUsageInfo.java b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/ResourceUsageInfo.java index a278b61894a65..e7b51c3389b52 100644 --- a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/ResourceUsageInfo.java +++ b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/ResourceUsageInfo.java @@ -104,6 +104,10 @@ public long getTotalValue() { return endValue.get() - startValue; } + public long getStartValue() { + return startValue; + } + @Override public String toString() { return String.valueOf(getTotalValue()); diff --git a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java new file mode 100644 index 0000000000000..373cdbfa7e9a1 --- /dev/null +++ b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java @@ -0,0 +1,225 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.core.tasks.resourcetracker; + +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.core.ParseField; +import org.opensearch.core.common.Strings; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ConstructingObjectParser; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +import static org.opensearch.core.xcontent.ConstructingObjectParser.constructorArg; + +/** + * Task resource usage information with minimal information about the task + *

+ * Writeable TaskResourceInfo objects are used to represent resource usage + * information of running tasks, which can be propagated to coordinator node + * to infer query-level resource usage + * + * @opensearch.api + */ +@PublicApi(since = "2.15.0") +public class TaskResourceInfo implements Writeable, ToXContentObject { + private final String action; + private final long taskId; + private final long parentTaskId; + private final String nodeId; + private final TaskResourceUsage taskResourceUsage; + + private static final ParseField ACTION = new ParseField("action"); + private static final ParseField TASK_ID = new ParseField("taskId"); + private static final ParseField PARENT_TASK_ID = new ParseField("parentTaskId"); + private static final ParseField NODE_ID = new ParseField("nodeId"); + private static final ParseField TASK_RESOURCE_USAGE = new ParseField("taskResourceUsage"); + + public TaskResourceInfo( + final String action, + final long taskId, + final long parentTaskId, + final String nodeId, + final TaskResourceUsage taskResourceUsage + ) { + this.action = action; + this.taskId = taskId; + this.parentTaskId = parentTaskId; + this.nodeId = nodeId; + this.taskResourceUsage = taskResourceUsage; + } + + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "task_resource_info", + a -> new Builder().setAction((String) a[0]) + .setTaskId((Long) a[1]) + .setParentTaskId((Long) a[2]) + .setNodeId((String) a[3]) + .setTaskResourceUsage((TaskResourceUsage) a[4]) + .build() + ); + + static { + PARSER.declareString(constructorArg(), ACTION); + PARSER.declareLong(constructorArg(), TASK_ID); + PARSER.declareLong(constructorArg(), PARENT_TASK_ID); + PARSER.declareString(constructorArg(), NODE_ID); + PARSER.declareObject(constructorArg(), TaskResourceUsage.PARSER, TASK_RESOURCE_USAGE); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(ACTION.getPreferredName(), this.action); + builder.field(TASK_ID.getPreferredName(), this.taskId); + builder.field(PARENT_TASK_ID.getPreferredName(), this.parentTaskId); + builder.field(NODE_ID.getPreferredName(), this.nodeId); + builder.startObject(TASK_RESOURCE_USAGE.getPreferredName()); + this.taskResourceUsage.toXContent(builder, params); + builder.endObject(); + builder.endObject(); + return builder; + } + + /** + * Builder for {@link TaskResourceInfo} + */ + public static class Builder { + private TaskResourceUsage taskResourceUsage; + private String action; + private long taskId; + private long parentTaskId; + private String nodeId; + + public Builder setTaskResourceUsage(final TaskResourceUsage taskResourceUsage) { + this.taskResourceUsage = taskResourceUsage; + return this; + } + + public Builder setAction(final String action) { + this.action = action; + return this; + } + + public Builder setTaskId(final long taskId) { + this.taskId = taskId; + return this; + } + + public Builder setParentTaskId(final long parentTaskId) { + this.parentTaskId = parentTaskId; + return this; + } + + public Builder setNodeId(final String nodeId) { + this.nodeId = nodeId; + return this; + } + + public TaskResourceInfo build() { + return new TaskResourceInfo(action, taskId, parentTaskId, nodeId, taskResourceUsage); + } + } + + /** + * Read task info from a stream. + * + * @param in StreamInput to read + * @return {@link TaskResourceInfo} + * @throws IOException IOException + */ + public static TaskResourceInfo readFromStream(StreamInput in) throws IOException { + return new TaskResourceInfo.Builder().setAction(in.readString()) + .setTaskId(in.readLong()) + .setParentTaskId(in.readLong()) + .setNodeId(in.readString()) + .setTaskResourceUsage(TaskResourceUsage.readFromStream(in)) + .build(); + } + + /** + * Get TaskResourceUsage + * + * @return taskResourceUsage + */ + public TaskResourceUsage getTaskResourceUsage() { + return taskResourceUsage; + } + + /** + * Get parent task id + * + * @return parent task id + */ + public long getParentTaskId() { + return parentTaskId; + } + + /** + * Get task id + * @return task id + */ + public long getTaskId() { + return taskId; + } + + /** + * Get node id + * @return node id + */ + public String getNodeId() { + return nodeId; + } + + /** + * Get task action + * @return task action + */ + public String getAction() { + return action; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(action); + out.writeLong(taskId); + out.writeLong(parentTaskId); + out.writeString(nodeId); + taskResourceUsage.writeTo(out); + } + + @Override + public String toString() { + return Strings.toString(MediaTypeRegistry.JSON, this); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != TaskResourceInfo.class) { + return false; + } + TaskResourceInfo other = (TaskResourceInfo) obj; + return action.equals(other.action) + && taskId == other.taskId + && parentTaskId == other.parentTaskId + && Objects.equals(nodeId, other.nodeId) + && taskResourceUsage.equals(other.taskResourceUsage); + } + + @Override + public int hashCode() { + return Objects.hash(action, taskId, parentTaskId, nodeId, taskResourceUsage); + } +} diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/DefaultMetricsRegistry.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/DefaultMetricsRegistry.java index c861c21f89fc5..bcf5c163cb91f 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/DefaultMetricsRegistry.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/DefaultMetricsRegistry.java @@ -48,6 +48,11 @@ public Closeable createGauge(String name, String description, String unit, Suppl return metricsTelemetry.createGauge(name, description, unit, valueProvider, tags); } + @Override + public Closeable createGauge(String name, String description, String unit, Supplier value) { + return metricsTelemetry.createGauge(name, description, unit, value); + } + @Override public void close() throws IOException { metricsTelemetry.close(); diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsRegistry.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsRegistry.java index 3ab3dcf82c7a7..3dc212b1341cc 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsRegistry.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsRegistry.java @@ -63,4 +63,16 @@ public interface MetricsRegistry extends Closeable { */ Closeable createGauge(String name, String description, String unit, Supplier valueProvider, Tags tags); + /** + * Creates the Observable Gauge type of Metric. Where the value provider will be called at a certain frequency + * to capture the value. + * + * @param name name of the observable gauge. + * @param description any description about the metric. + * @param unit unit of the metric. + * @param value value provider. + * @return closeable to dispose/close the Gauge metric. + */ + Closeable createGauge(String name, String description, String unit, Supplier value); + } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/TaggedMeasurement.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/TaggedMeasurement.java new file mode 100644 index 0000000000000..707f2c79c62f2 --- /dev/null +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/TaggedMeasurement.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.telemetry.metrics.tags.Tags; + +/** + * Observable Measurement for the Asynchronous instruments. + * @opensearch.experimental + */ +@ExperimentalApi +public final class TaggedMeasurement { + private final Double value; + private final Tags tags; + + /** + * Factory method to create the {@link TaggedMeasurement} object. + * @param value value. + * @param tags tags to be added per value. + * @return tagged measurement TaggedMeasurement + */ + public static TaggedMeasurement create(double value, Tags tags) { + return new TaggedMeasurement(value, tags); + } + + private TaggedMeasurement(double value, Tags tags) { + this.value = value; + this.tags = tags; + } + + /** + * Returns the value. + * @return value + */ + public Double getValue() { + return value; + } + + /** + * Returns the tags. + * @return tags + */ + public Tags getTags() { + return tags; + } +} diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/noop/NoopMetricsRegistry.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/noop/NoopMetricsRegistry.java index 9a913d25e872d..7bec136c42ba7 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/noop/NoopMetricsRegistry.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/noop/NoopMetricsRegistry.java @@ -12,6 +12,7 @@ import org.opensearch.telemetry.metrics.Counter; import org.opensearch.telemetry.metrics.Histogram; import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.TaggedMeasurement; import org.opensearch.telemetry.metrics.tags.Tags; import java.io.Closeable; @@ -52,6 +53,11 @@ public Closeable createGauge(String name, String description, String unit, Suppl return () -> {}; } + @Override + public Closeable createGauge(String name, String description, String unit, Supplier value) { + return () -> {}; + } + @Override public void close() throws IOException { diff --git a/libs/telemetry/src/test/java/org/opensearch/telemetry/metrics/DefaultMetricsRegistryTests.java b/libs/telemetry/src/test/java/org/opensearch/telemetry/metrics/DefaultMetricsRegistryTests.java index 872f697ade09e..e1506eecff6e9 100644 --- a/libs/telemetry/src/test/java/org/opensearch/telemetry/metrics/DefaultMetricsRegistryTests.java +++ b/libs/telemetry/src/test/java/org/opensearch/telemetry/metrics/DefaultMetricsRegistryTests.java @@ -79,4 +79,19 @@ public void testGauge() { assertSame(mockCloseable, closeable); } + @SuppressWarnings("unchecked") + public void testGaugeWithValueAndTagSupplier() { + Closeable mockCloseable = mock(Closeable.class); + when(defaultMeterRegistry.createGauge(any(String.class), any(String.class), any(String.class), any(Supplier.class))).thenReturn( + mockCloseable + ); + Closeable closeable = defaultMeterRegistry.createGauge( + "org.opensearch.telemetry.metrics.DefaultMeterRegistryTests.testObservableGauge", + "test observable gauge", + "ms", + () -> TaggedMeasurement.create(1.0, Tags.EMPTY) + ); + assertSame(mockCloseable, closeable); + } + } diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/QueryInsightsPlugin.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/QueryInsightsPlugin.java index 22831c3e0f8ba..bba676436c39a 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/QueryInsightsPlugin.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/QueryInsightsPlugin.java @@ -111,7 +111,15 @@ public List> getSettings() { QueryInsightsSettings.TOP_N_LATENCY_QUERIES_ENABLED, QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE, QueryInsightsSettings.TOP_N_LATENCY_QUERIES_WINDOW_SIZE, - QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS + QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS, + QueryInsightsSettings.TOP_N_CPU_QUERIES_ENABLED, + QueryInsightsSettings.TOP_N_CPU_QUERIES_SIZE, + QueryInsightsSettings.TOP_N_CPU_QUERIES_WINDOW_SIZE, + QueryInsightsSettings.TOP_N_CPU_EXPORTER_SETTINGS, + QueryInsightsSettings.TOP_N_MEMORY_QUERIES_ENABLED, + QueryInsightsSettings.TOP_N_MEMORY_QUERIES_SIZE, + QueryInsightsSettings.TOP_N_MEMORY_QUERIES_WINDOW_SIZE, + QueryInsightsSettings.TOP_N_MEMORY_EXPORTER_SETTINGS ); } } diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/exporter/QueryInsightsExporterFactory.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/exporter/QueryInsightsExporterFactory.java index 7324590c9f582..016911761a3d0 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/exporter/QueryInsightsExporterFactory.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/exporter/QueryInsightsExporterFactory.java @@ -19,7 +19,7 @@ import java.util.Locale; import java.util.Set; -import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_TOP_N_LATENCY_QUERIES_INDEX_PATTERN; +import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_TOP_N_QUERIES_INDEX_PATTERN; import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_TOP_QUERIES_EXPORTER_TYPE; import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.EXPORTER_TYPE; import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.EXPORT_INDEX; @@ -71,7 +71,7 @@ public void validateExporterConfig(final Settings settings) throws IllegalArgume } switch (type) { case LOCAL_INDEX: - final String indexPattern = settings.get(EXPORT_INDEX, DEFAULT_TOP_N_LATENCY_QUERIES_INDEX_PATTERN); + final String indexPattern = settings.get(EXPORT_INDEX, DEFAULT_TOP_N_QUERIES_INDEX_PATTERN); if (indexPattern.length() == 0) { throw new IllegalArgumentException("Empty index pattern configured for the exporter"); } diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListener.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListener.java index cad2fe374f1b6..a1f810ad5987c 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListener.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListener.java @@ -14,8 +14,10 @@ import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchRequestContext; import org.opensearch.action.search.SearchRequestOperationsListener; +import org.opensearch.action.search.SearchTask; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.inject.Inject; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.plugin.insights.core.service.QueryInsightsService; import org.opensearch.plugin.insights.rules.model.Attribute; @@ -25,13 +27,14 @@ import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.concurrent.TimeUnit; -import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.TOP_N_LATENCY_QUERIES_ENABLED; -import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE; -import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.TOP_N_LATENCY_QUERIES_WINDOW_SIZE; +import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getTopNEnabledSetting; +import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getTopNSizeSetting; +import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getTopNWindowSizeSetting; /** * The listener for query insights services. @@ -46,6 +49,7 @@ public final class QueryInsightsListener extends SearchRequestOperationsListener private static final Logger log = LogManager.getLogger(QueryInsightsListener.class); private final QueryInsightsService queryInsightsService; + private final ClusterService clusterService; /** * Constructor for QueryInsightsListener @@ -55,26 +59,32 @@ public final class QueryInsightsListener extends SearchRequestOperationsListener */ @Inject public QueryInsightsListener(final ClusterService clusterService, final QueryInsightsService queryInsightsService) { + this.clusterService = clusterService; this.queryInsightsService = queryInsightsService; - clusterService.getClusterSettings() - .addSettingsUpdateConsumer(TOP_N_LATENCY_QUERIES_ENABLED, v -> this.setEnableTopQueries(MetricType.LATENCY, v)); - clusterService.getClusterSettings() - .addSettingsUpdateConsumer( - TOP_N_LATENCY_QUERIES_SIZE, - v -> this.queryInsightsService.getTopQueriesService(MetricType.LATENCY).setTopNSize(v), - v -> this.queryInsightsService.getTopQueriesService(MetricType.LATENCY).validateTopNSize(v) - ); - clusterService.getClusterSettings() - .addSettingsUpdateConsumer( - TOP_N_LATENCY_QUERIES_WINDOW_SIZE, - v -> this.queryInsightsService.getTopQueriesService(MetricType.LATENCY).setWindowSize(v), - v -> this.queryInsightsService.getTopQueriesService(MetricType.LATENCY).validateWindowSize(v) - ); - this.setEnableTopQueries(MetricType.LATENCY, clusterService.getClusterSettings().get(TOP_N_LATENCY_QUERIES_ENABLED)); - this.queryInsightsService.getTopQueriesService(MetricType.LATENCY) - .setTopNSize(clusterService.getClusterSettings().get(TOP_N_LATENCY_QUERIES_SIZE)); - this.queryInsightsService.getTopQueriesService(MetricType.LATENCY) - .setWindowSize(clusterService.getClusterSettings().get(TOP_N_LATENCY_QUERIES_WINDOW_SIZE)); + // Setting endpoints set up for top n queries, including enabling top n queries, window size and top n size + // Expected metricTypes are Latency, CPU and Memory. + for (MetricType type : MetricType.allMetricTypes()) { + clusterService.getClusterSettings() + .addSettingsUpdateConsumer(getTopNEnabledSetting(type), v -> this.setEnableTopQueries(type, v)); + clusterService.getClusterSettings() + .addSettingsUpdateConsumer( + getTopNSizeSetting(type), + v -> this.queryInsightsService.setTopNSize(type, v), + v -> this.queryInsightsService.validateTopNSize(type, v) + ); + clusterService.getClusterSettings() + .addSettingsUpdateConsumer( + getTopNWindowSizeSetting(type), + v -> this.queryInsightsService.setWindowSize(type, v), + v -> this.queryInsightsService.validateWindowSize(type, v) + ); + + this.setEnableTopQueries(type, clusterService.getClusterSettings().get(getTopNEnabledSetting(type))); + this.queryInsightsService.validateTopNSize(type, clusterService.getClusterSettings().get(getTopNSizeSetting(type))); + this.queryInsightsService.setTopNSize(type, clusterService.getClusterSettings().get(getTopNSizeSetting(type))); + this.queryInsightsService.validateWindowSize(type, clusterService.getClusterSettings().get(getTopNWindowSizeSetting(type))); + this.queryInsightsService.setWindowSize(type, clusterService.getClusterSettings().get(getTopNWindowSizeSetting(type))); + } } /** @@ -124,6 +134,27 @@ public void onRequestStart(SearchRequestContext searchRequestContext) {} @Override public void onRequestEnd(final SearchPhaseContext context, final SearchRequestContext searchRequestContext) { + constructSearchQueryRecord(context, searchRequestContext); + } + + @Override + public void onRequestFailure(final SearchPhaseContext context, final SearchRequestContext searchRequestContext) { + constructSearchQueryRecord(context, searchRequestContext); + } + + private void constructSearchQueryRecord(final SearchPhaseContext context, final SearchRequestContext searchRequestContext) { + SearchTask searchTask = context.getTask(); + List tasksResourceUsages = searchRequestContext.getPhaseResourceUsage(); + tasksResourceUsages.add( + new TaskResourceInfo( + searchTask.getAction(), + searchTask.getId(), + searchTask.getParentTaskId().getId(), + clusterService.localNode().getId(), + searchTask.getTotalResourceStats() + ) + ); + final SearchRequest request = context.getRequest(); try { Map measurements = new HashMap<>(); @@ -133,12 +164,25 @@ public void onRequestEnd(final SearchPhaseContext context, final SearchRequestCo TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - searchRequestContext.getAbsoluteStartNanos()) ); } + if (queryInsightsService.isCollectionEnabled(MetricType.CPU)) { + measurements.put( + MetricType.CPU, + tasksResourceUsages.stream().map(a -> a.getTaskResourceUsage().getCpuTimeInNanos()).mapToLong(Long::longValue).sum() + ); + } + if (queryInsightsService.isCollectionEnabled(MetricType.MEMORY)) { + measurements.put( + MetricType.MEMORY, + tasksResourceUsages.stream().map(a -> a.getTaskResourceUsage().getMemoryInBytes()).mapToLong(Long::longValue).sum() + ); + } Map attributes = new HashMap<>(); attributes.put(Attribute.SEARCH_TYPE, request.searchType().toString().toLowerCase(Locale.ROOT)); attributes.put(Attribute.SOURCE, request.source().toString(FORMAT_PARAMS)); attributes.put(Attribute.TOTAL_SHARDS, context.getNumShards()); attributes.put(Attribute.INDICES, request.indices()); attributes.put(Attribute.PHASE_LATENCY_MAP, searchRequestContext.phaseTookMap()); + attributes.put(Attribute.TASK_RESOURCE_USAGES, tasksResourceUsages); Map labels = new HashMap<>(); // Retrieve user provided label if exists @@ -154,4 +198,5 @@ public void onRequestEnd(final SearchPhaseContext context, final SearchRequestCo log.error(String.format(Locale.ROOT, "fail to ingest query insight data, error: %s", e)); } } + } diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/service/QueryInsightsService.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/service/QueryInsightsService.java index a83bb2094f165..c63430a1a726c 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/service/QueryInsightsService.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/service/QueryInsightsService.java @@ -12,6 +12,8 @@ import org.opensearch.common.inject.Inject; import org.opensearch.common.lifecycle.AbstractLifecycleComponent; import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.plugin.insights.core.exporter.QueryInsightsExporterFactory; import org.opensearch.plugin.insights.rules.model.MetricType; import org.opensearch.plugin.insights.rules.model.SearchQueryRecord; @@ -27,7 +29,7 @@ import java.util.Map; import java.util.concurrent.LinkedBlockingQueue; -import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS; +import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.getExporterSettings; /** * Service responsible for gathering, analyzing, storing and exporting @@ -86,11 +88,13 @@ public QueryInsightsService(final ClusterSettings clusterSettings, final ThreadP enableCollect.put(metricType, false); topQueriesServices.put(metricType, new TopQueriesService(metricType, threadPool, queryInsightsExporterFactory)); } - clusterSettings.addSettingsUpdateConsumer( - TOP_N_LATENCY_EXPORTER_SETTINGS, - (settings -> getTopQueriesService(MetricType.LATENCY).setExporter(settings)), - (settings -> getTopQueriesService(MetricType.LATENCY).validateExporterConfig(settings)) - ); + for (MetricType type : MetricType.allMetricTypes()) { + clusterSettings.addSettingsUpdateConsumer( + getExporterSettings(type), + (settings -> setExporter(type, settings)), + (settings -> validateExporterConfig(type, settings)) + ); + } } /** @@ -177,6 +181,78 @@ public boolean isEnabled() { return false; } + /** + * Validate the window size config for a metricType + * + * @param type {@link MetricType} + * @param windowSize {@link TimeValue} + */ + public void validateWindowSize(final MetricType type, final TimeValue windowSize) { + if (topQueriesServices.containsKey(type)) { + topQueriesServices.get(type).validateWindowSize(windowSize); + } + } + + /** + * Set window size for a metricType + * + * @param type {@link MetricType} + * @param windowSize {@link TimeValue} + */ + public void setWindowSize(final MetricType type, final TimeValue windowSize) { + if (topQueriesServices.containsKey(type)) { + topQueriesServices.get(type).setWindowSize(windowSize); + } + } + + /** + * Validate the top n size config for a metricType + * + * @param type {@link MetricType} + * @param topNSize top n size + */ + public void validateTopNSize(final MetricType type, final int topNSize) { + if (topQueriesServices.containsKey(type)) { + topQueriesServices.get(type).validateTopNSize(topNSize); + } + } + + /** + * Set the top n size config for a metricType + * + * @param type {@link MetricType} + * @param topNSize top n size + */ + public void setTopNSize(final MetricType type, final int topNSize) { + if (topQueriesServices.containsKey(type)) { + topQueriesServices.get(type).setTopNSize(topNSize); + } + } + + /** + * Set the exporter config for a metricType + * + * @param type {@link MetricType} + * @param settings exporter settings + */ + public void setExporter(final MetricType type, final Settings settings) { + if (topQueriesServices.containsKey(type)) { + topQueriesServices.get(type).setExporter(settings); + } + } + + /** + * Validate the exporter config for a metricType + * + * @param type {@link MetricType} + * @param settings exporter settings + */ + public void validateExporterConfig(final MetricType type, final Settings settings) { + if (topQueriesServices.containsKey(type)) { + topQueriesServices.get(type).validateExporterConfig(settings); + } + } + @Override protected void doStart() { if (isEnabled()) { diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/service/TopQueriesService.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/service/TopQueriesService.java index ff90edf1ec33d..c21b89be4dcca 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/service/TopQueriesService.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/service/TopQueriesService.java @@ -35,7 +35,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_TOP_N_LATENCY_QUERIES_INDEX_PATTERN; +import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_TOP_N_QUERIES_INDEX_PATTERN; import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.DEFAULT_TOP_QUERIES_EXPORTER_TYPE; import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.EXPORTER_TYPE; import static org.opensearch.plugin.insights.settings.QueryInsightsSettings.EXPORT_INDEX; @@ -218,10 +218,7 @@ public void setExporter(final Settings settings) { if (settings.get(EXPORTER_TYPE) != null) { SinkType expectedType = SinkType.parse(settings.get(EXPORTER_TYPE, DEFAULT_TOP_QUERIES_EXPORTER_TYPE)); if (exporter != null && expectedType == SinkType.getSinkTypeFromExporter(exporter)) { - queryInsightsExporterFactory.updateExporter( - exporter, - settings.get(EXPORT_INDEX, DEFAULT_TOP_N_LATENCY_QUERIES_INDEX_PATTERN) - ); + queryInsightsExporterFactory.updateExporter(exporter, settings.get(EXPORT_INDEX, DEFAULT_TOP_N_QUERIES_INDEX_PATTERN)); } else { try { queryInsightsExporterFactory.closeExporter(this.exporter); @@ -230,7 +227,7 @@ public void setExporter(final Settings settings) { } this.exporter = queryInsightsExporterFactory.createExporter( SinkType.parse(settings.get(EXPORTER_TYPE, DEFAULT_TOP_QUERIES_EXPORTER_TYPE)), - settings.get(EXPORT_INDEX, DEFAULT_TOP_N_LATENCY_QUERIES_INDEX_PATTERN) + settings.get(EXPORT_INDEX, DEFAULT_TOP_N_QUERIES_INDEX_PATTERN) ); } } else { diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/Attribute.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/Attribute.java index 7ee4883c54023..dcdb085fdc6fa 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/Attribute.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/Attribute.java @@ -44,6 +44,10 @@ public enum Attribute { * The node id for this request */ NODE_ID, + /** + * Tasks level resource usages in this request + */ + TASK_RESOURCE_USAGES, /** * Custom search request labels */ diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java index cdd090fbf4804..4694c757f4ef2 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java @@ -35,7 +35,7 @@ public enum MetricType implements Comparator { /** * JVM heap usage metric type */ - JVM; + MEMORY; /** * Read a MetricType from a StreamInput @@ -93,10 +93,9 @@ public static Set allMetricTypes() { public int compare(final Number a, final Number b) { switch (this) { case LATENCY: - return Long.compare(a.longValue(), b.longValue()); - case JVM: case CPU: - return Double.compare(a.doubleValue(), b.doubleValue()); + case MEMORY: + return Long.compare(a.longValue(), b.longValue()); } return -1; } @@ -110,10 +109,9 @@ public int compare(final Number a, final Number b) { Number parseValue(final Object o) { switch (this) { case LATENCY: - return (Long) o; - case JVM: case CPU: - return (Double) o; + case MEMORY: + return (Long) o; default: return (Number) o; } diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java index 4b4a277263c0f..20465102d58ac 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java @@ -8,7 +8,6 @@ package org.opensearch.plugin.insights.rules.transport.top_queries; -import org.opensearch.OpenSearchException; import org.opensearch.action.FailedNodeException; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.nodes.BaseNodeRequest; @@ -22,14 +21,12 @@ import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesAction; import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesRequest; import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesResponse; -import org.opensearch.plugin.insights.rules.model.MetricType; import org.opensearch.plugin.insights.settings.QueryInsightsSettings; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; import java.io.IOException; import java.util.List; -import java.util.Locale; /** * Transport action for cluster/node level top queries information. @@ -81,17 +78,18 @@ protected TopQueriesResponse newResponse( final List responses, final List failures ) { - if (topQueriesRequest.getMetricType() == MetricType.LATENCY) { - return new TopQueriesResponse( - clusterService.getClusterName(), - responses, - failures, - clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE), - MetricType.LATENCY - ); - } else { - throw new OpenSearchException(String.format(Locale.ROOT, "invalid metric type %s", topQueriesRequest.getMetricType())); + int size; + switch (topQueriesRequest.getMetricType()) { + case CPU: + size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_CPU_QUERIES_SIZE); + break; + case MEMORY: + size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_MEMORY_QUERIES_SIZE); + break; + default: + size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE); } + return new TopQueriesResponse(clusterService.getClusterName(), responses, failures, size, topQueriesRequest.getMetricType()); } @Override @@ -107,15 +105,10 @@ protected TopQueries newNodeResponse(final StreamInput in) throws IOException { @Override protected TopQueries nodeOperation(final NodeRequest nodeRequest) { final TopQueriesRequest request = nodeRequest.request; - if (request.getMetricType() == MetricType.LATENCY) { - return new TopQueries( - clusterService.localNode(), - queryInsightsService.getTopQueriesService(MetricType.LATENCY).getTopQueriesRecords(true) - ); - } else { - throw new OpenSearchException(String.format(Locale.ROOT, "invalid metric type %s", request.getMetricType())); - } - + return new TopQueries( + clusterService.localNode(), + queryInsightsService.getTopQueriesService(request.getMetricType()).getTopQueriesRecords(true) + ); } /** diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/settings/QueryInsightsSettings.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/settings/QueryInsightsSettings.java index b2e01062e334c..25309b5721792 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/settings/QueryInsightsSettings.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/settings/QueryInsightsSettings.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.plugin.insights.core.exporter.SinkType; +import org.opensearch.plugin.insights.rules.model.MetricType; import java.util.Arrays; import java.util.HashSet; @@ -81,6 +82,10 @@ public class QueryInsightsSettings { public static final String TOP_N_QUERIES_SETTING_PREFIX = "search.insights.top_queries"; /** Default prefix for top N queries by latency feature */ public static final String TOP_N_LATENCY_QUERIES_PREFIX = TOP_N_QUERIES_SETTING_PREFIX + ".latency"; + /** Default prefix for top N queries by cpu feature */ + public static final String TOP_N_CPU_QUERIES_PREFIX = TOP_N_QUERIES_SETTING_PREFIX + ".cpu"; + /** Default prefix for top N queries by memory feature */ + public static final String TOP_N_MEMORY_QUERIES_PREFIX = TOP_N_QUERIES_SETTING_PREFIX + ".memory"; /** * Boolean setting for enabling top queries by latency. */ @@ -111,6 +116,66 @@ public class QueryInsightsSettings { Setting.Property.Dynamic ); + /** + * Boolean setting for enabling top queries by cpu. + */ + public static final Setting TOP_N_CPU_QUERIES_ENABLED = Setting.boolSetting( + TOP_N_CPU_QUERIES_PREFIX + ".enabled", + false, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Int setting to define the top n size for top queries by cpu. + */ + public static final Setting TOP_N_CPU_QUERIES_SIZE = Setting.intSetting( + TOP_N_CPU_QUERIES_PREFIX + ".top_n_size", + DEFAULT_TOP_N_SIZE, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Time setting to define the window size in seconds for top queries by cpu. + */ + public static final Setting TOP_N_CPU_QUERIES_WINDOW_SIZE = Setting.positiveTimeSetting( + TOP_N_CPU_QUERIES_PREFIX + ".window_size", + DEFAULT_WINDOW_SIZE, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + + /** + * Boolean setting for enabling top queries by memory. + */ + public static final Setting TOP_N_MEMORY_QUERIES_ENABLED = Setting.boolSetting( + TOP_N_MEMORY_QUERIES_PREFIX + ".enabled", + false, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Int setting to define the top n size for top queries by memory. + */ + public static final Setting TOP_N_MEMORY_QUERIES_SIZE = Setting.intSetting( + TOP_N_MEMORY_QUERIES_PREFIX + ".top_n_size", + DEFAULT_TOP_N_SIZE, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Time setting to define the window size in seconds for top queries by memory. + */ + public static final Setting TOP_N_MEMORY_QUERIES_WINDOW_SIZE = Setting.positiveTimeSetting( + TOP_N_MEMORY_QUERIES_PREFIX + ".window_size", + DEFAULT_WINDOW_SIZE, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + /** * Config key for exporter type */ @@ -125,9 +190,17 @@ public class QueryInsightsSettings { */ private static final String TOP_N_LATENCY_QUERIES_EXPORTER_PREFIX = TOP_N_LATENCY_QUERIES_PREFIX + ".exporter."; /** - * Default index pattern of top n queries by latency + * Prefix for top n queries by cpu exporters + */ + private static final String TOP_N_CPU_QUERIES_EXPORTER_PREFIX = TOP_N_CPU_QUERIES_PREFIX + ".exporter."; + /** + * Prefix for top n queries by memory exporters */ - public static final String DEFAULT_TOP_N_LATENCY_QUERIES_INDEX_PATTERN = "'top_queries_by_latency-'YYYY.MM.dd"; + private static final String TOP_N_MEMORY_QUERIES_EXPORTER_PREFIX = TOP_N_MEMORY_QUERIES_PREFIX + ".exporter."; + /** + * Default index pattern of top n queries + */ + public static final String DEFAULT_TOP_N_QUERIES_INDEX_PATTERN = "'top_queries-'YYYY.MM.dd"; /** * Default exporter type of top queries */ @@ -142,6 +215,88 @@ public class QueryInsightsSettings { Setting.Property.NodeScope ); + /** + * Settings for the exporter of top cpu queries + */ + public static final Setting TOP_N_CPU_EXPORTER_SETTINGS = Setting.groupSetting( + TOP_N_CPU_QUERIES_EXPORTER_PREFIX, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Settings for the exporter of top cpu queries + */ + public static final Setting TOP_N_MEMORY_EXPORTER_SETTINGS = Setting.groupSetting( + TOP_N_MEMORY_QUERIES_EXPORTER_PREFIX, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Get the enabled setting based on type + * @param type MetricType + * @return enabled setting + */ + public static Setting getTopNEnabledSetting(MetricType type) { + switch (type) { + case CPU: + return TOP_N_CPU_QUERIES_ENABLED; + case MEMORY: + return TOP_N_MEMORY_QUERIES_ENABLED; + default: + return TOP_N_LATENCY_QUERIES_ENABLED; + } + } + + /** + * Get the top n size setting based on type + * @param type MetricType + * @return top n size setting + */ + public static Setting getTopNSizeSetting(MetricType type) { + switch (type) { + case CPU: + return TOP_N_CPU_QUERIES_SIZE; + case MEMORY: + return TOP_N_MEMORY_QUERIES_SIZE; + default: + return TOP_N_LATENCY_QUERIES_SIZE; + } + } + + /** + * Get the window size setting based on type + * @param type MetricType + * @return top n queries window size setting + */ + public static Setting getTopNWindowSizeSetting(MetricType type) { + switch (type) { + case CPU: + return TOP_N_CPU_QUERIES_WINDOW_SIZE; + case MEMORY: + return TOP_N_MEMORY_QUERIES_WINDOW_SIZE; + default: + return TOP_N_LATENCY_QUERIES_WINDOW_SIZE; + } + } + + /** + * Get the exporter settings based on type + * @param type MetricType + * @return exporter setting + */ + public static Setting getExporterSettings(MetricType type) { + switch (type) { + case CPU: + return TOP_N_CPU_EXPORTER_SETTINGS; + case MEMORY: + return TOP_N_MEMORY_EXPORTER_SETTINGS; + default: + return TOP_N_LATENCY_EXPORTER_SETTINGS; + } + } + /** * Default constructor */ diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java index 8b8856e3e305c..2efe9085a39ee 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java @@ -47,11 +47,7 @@ public void setup() { Settings.Builder settingsBuilder = Settings.builder(); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_ENABLED); - clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE); - clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_WINDOW_SIZE); - clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS); - + QueryInsightsTestUtils.registerAllQueryInsightsSettings(clusterSettings); clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, threadPool); } @@ -61,7 +57,15 @@ public void testGetSettings() { QueryInsightsSettings.TOP_N_LATENCY_QUERIES_ENABLED, QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE, QueryInsightsSettings.TOP_N_LATENCY_QUERIES_WINDOW_SIZE, - QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS + QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS, + QueryInsightsSettings.TOP_N_CPU_QUERIES_ENABLED, + QueryInsightsSettings.TOP_N_CPU_QUERIES_SIZE, + QueryInsightsSettings.TOP_N_CPU_QUERIES_WINDOW_SIZE, + QueryInsightsSettings.TOP_N_CPU_EXPORTER_SETTINGS, + QueryInsightsSettings.TOP_N_MEMORY_QUERIES_ENABLED, + QueryInsightsSettings.TOP_N_MEMORY_QUERIES_SIZE, + QueryInsightsSettings.TOP_N_MEMORY_QUERIES_WINDOW_SIZE, + QueryInsightsSettings.TOP_N_MEMORY_EXPORTER_SETTINGS ), queryInsightsPlugin.getSettings() ); diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java index 870ef5b9c8be9..7fa4e9841c20e 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java @@ -10,6 +10,7 @@ import org.opensearch.action.search.SearchType; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.util.Maps; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; @@ -17,6 +18,7 @@ import org.opensearch.plugin.insights.rules.model.Attribute; import org.opensearch.plugin.insights.rules.model.MetricType; import org.opensearch.plugin.insights.rules.model.SearchQueryRecord; +import org.opensearch.plugin.insights.settings.QueryInsightsSettings; import org.opensearch.test.VersionUtils; import java.io.IOException; @@ -36,7 +38,6 @@ import static org.opensearch.test.OpenSearchTestCase.random; import static org.opensearch.test.OpenSearchTestCase.randomAlphaOfLengthBetween; import static org.opensearch.test.OpenSearchTestCase.randomArray; -import static org.opensearch.test.OpenSearchTestCase.randomDouble; import static org.opensearch.test.OpenSearchTestCase.randomIntBetween; import static org.opensearch.test.OpenSearchTestCase.randomLong; import static org.opensearch.test.OpenSearchTestCase.randomLongBetween; @@ -63,9 +64,9 @@ public static List generateQueryInsightRecords(int lower, int MetricType.LATENCY, randomLongBetween(1000, 10000), MetricType.CPU, - randomDouble(), - MetricType.JVM, - randomDouble() + randomLongBetween(1000, 10000), + MetricType.MEMORY, + randomLongBetween(1000, 10000) ); Map phaseLatencyMap = new HashMap<>(); @@ -186,4 +187,19 @@ public static boolean checkRecordsEqualsWithoutOrder( } return true; } + + public static void registerAllQueryInsightsSettings(ClusterSettings clusterSettings) { + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_ENABLED); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_WINDOW_SIZE); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_CPU_QUERIES_ENABLED); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_CPU_QUERIES_SIZE); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_CPU_QUERIES_WINDOW_SIZE); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_CPU_EXPORTER_SETTINGS); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_MEMORY_QUERIES_ENABLED); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_MEMORY_QUERIES_SIZE); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_MEMORY_QUERIES_WINDOW_SIZE); + clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_MEMORY_EXPORTER_SETTINGS); + } } diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java index b794a2e4b8608..86de44c680188 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java @@ -13,23 +13,28 @@ import org.opensearch.action.search.SearchRequestContext; import org.opensearch.action.search.SearchTask; import org.opensearch.action.search.SearchType; +import org.opensearch.action.support.replication.ClusterStateCreationUtils; +import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.collect.Tuple; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.plugin.insights.QueryInsightsTestUtils; import org.opensearch.plugin.insights.core.service.QueryInsightsService; import org.opensearch.plugin.insights.core.service.TopQueriesService; import org.opensearch.plugin.insights.rules.model.Attribute; import org.opensearch.plugin.insights.rules.model.MetricType; import org.opensearch.plugin.insights.rules.model.SearchQueryRecord; -import org.opensearch.plugin.insights.settings.QueryInsightsSettings; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.support.ValueType; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.tasks.Task; import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; import org.junit.Before; @@ -41,6 +46,7 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Phaser; +import java.util.concurrent.TimeUnit; import org.mockito.ArgumentCaptor; @@ -59,7 +65,7 @@ public class QueryInsightsListenerTests extends OpenSearchTestCase { private final SearchRequest searchRequest = mock(SearchRequest.class); private final QueryInsightsService queryInsightsService = mock(QueryInsightsService.class); private final TopQueriesService topQueriesService = mock(TopQueriesService.class); - private final ThreadPool threadPool = mock(ThreadPool.class); + private final ThreadPool threadPool = new TestThreadPool("QueryInsightsThreadPool"); private ClusterService clusterService; @Before @@ -67,16 +73,22 @@ public void setup() { Settings.Builder settingsBuilder = Settings.builder(); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_ENABLED); - clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE); - clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_WINDOW_SIZE); - clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); + QueryInsightsTestUtils.registerAllQueryInsightsSettings(clusterSettings); + ClusterState state = ClusterStateCreationUtils.stateWithActivePrimary("test", true, 1 + randomInt(3), randomInt(2)); + clusterService = ClusterServiceUtils.createClusterService(threadPool, state.getNodes().getLocalNode(), clusterSettings); + ClusterServiceUtils.setState(clusterService, state); when(queryInsightsService.isCollectionEnabled(MetricType.LATENCY)).thenReturn(true); when(queryInsightsService.getTopQueriesService(MetricType.LATENCY)).thenReturn(topQueriesService); ThreadContext threadContext = new ThreadContext(Settings.EMPTY); - threadContext.setHeaders(new Tuple<>(Collections.singletonMap(Task.X_OPAQUE_ID, "userLabel"), new HashMap<>())); - when(threadPool.getThreadContext()).thenReturn(threadContext); + threadPool.getThreadContext().setHeaders(new Tuple<>(Collections.singletonMap(Task.X_OPAQUE_ID, "userLabel"), new HashMap<>())); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + IOUtils.close(clusterService); + ThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS); } @SuppressWarnings("unchecked") @@ -87,7 +99,14 @@ public void testOnRequestEnd() throws InterruptedException { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.aggregation(new TermsAggregationBuilder("agg1").userValueTypeHint(ValueType.STRING).field("type.keyword")); searchSourceBuilder.size(0); - SearchTask task = new SearchTask(0, "n/a", "n/a", () -> "test", null, Collections.singletonMap(Task.X_OPAQUE_ID, "userLabel")); + SearchTask task = new SearchTask( + 0, + "n/a", + "n/a", + () -> "test", + TaskId.EMPTY_TASK_ID, + Collections.singletonMap(Task.X_OPAQUE_ID, "userLabel") + ); String[] indices = new String[] { "index-1", "index-2" }; @@ -129,7 +148,14 @@ public void testConcurrentOnRequestEnd() throws InterruptedException { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.aggregation(new TermsAggregationBuilder("agg1").userValueTypeHint(ValueType.STRING).field("type.keyword")); searchSourceBuilder.size(0); - SearchTask task = new SearchTask(0, "n/a", "n/a", () -> "test", null, Collections.singletonMap(Task.X_OPAQUE_ID, "userLabel")); + SearchTask task = new SearchTask( + 0, + "n/a", + "n/a", + () -> "test", + TaskId.EMPTY_TASK_ID, + Collections.singletonMap(Task.X_OPAQUE_ID, "userLabel") + ); String[] indices = new String[] { "index-1", "index-2" }; @@ -184,7 +210,7 @@ public void testSetEnabled() { when(queryInsightsService.isCollectionEnabled(MetricType.LATENCY)).thenReturn(false); when(queryInsightsService.isCollectionEnabled(MetricType.CPU)).thenReturn(false); - when(queryInsightsService.isCollectionEnabled(MetricType.JVM)).thenReturn(false); + when(queryInsightsService.isCollectionEnabled(MetricType.MEMORY)).thenReturn(false); queryInsightsListener.setEnableTopQueries(MetricType.LATENCY, false); assertFalse(queryInsightsListener.isEnabled()); } diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java index 428f615ce2f90..75a5768f50681 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java @@ -34,11 +34,11 @@ public void setup() { Settings.Builder settingsBuilder = Settings.builder(); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS); + QueryInsightsTestUtils.registerAllQueryInsightsSettings(clusterSettings); queryInsightsService = new QueryInsightsService(clusterSettings, threadPool, client); queryInsightsService.enableCollection(MetricType.LATENCY, true); queryInsightsService.enableCollection(MetricType.CPU, true); - queryInsightsService.enableCollection(MetricType.JVM, true); + queryInsightsService.enableCollection(MetricType.MEMORY, true); } public void testAddRecordToLimitAndDrain() { diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java index 793d5878e2300..ad45b53ec5363 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java @@ -39,7 +39,7 @@ public void testSerializationAndEquals() throws Exception { public void testAllMetricTypes() { Set allMetrics = MetricType.allMetricTypes(); - Set expected = new HashSet<>(Arrays.asList(MetricType.LATENCY, MetricType.CPU, MetricType.JVM)); + Set expected = new HashSet<>(Arrays.asList(MetricType.LATENCY, MetricType.CPU, MetricType.MEMORY)); assertEquals(expected, allMetrics); } diff --git a/plugins/repository-azure/build.gradle b/plugins/repository-azure/build.gradle index 81444b637e90c..c2bf9e07e294e 100644 --- a/plugins/repository-azure/build.gradle +++ b/plugins/repository-azure/build.gradle @@ -44,10 +44,11 @@ opensearchplugin { } dependencies { - api 'com.azure:azure-core:1.47.0' + api 'com.azure:azure-core:1.49.1' api 'com.azure:azure-json:1.1.0' + api 'com.azure:azure-xml:1.0.0' api 'com.azure:azure-storage-common:12.21.2' - api 'com.azure:azure-core-http-netty:1.12.8' + api 'com.azure:azure-core-http-netty:1.15.1' api "io.netty:netty-codec-dns:${versions.netty}" api "io.netty:netty-codec-socks:${versions.netty}" api "io.netty:netty-codec-http2:${versions.netty}" diff --git a/plugins/repository-azure/licenses/azure-core-1.47.0.jar.sha1 b/plugins/repository-azure/licenses/azure-core-1.47.0.jar.sha1 deleted file mode 100644 index 42e35aacc63b1..0000000000000 --- a/plugins/repository-azure/licenses/azure-core-1.47.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6b300175826f0bb0916fca2fa5f70885b716e93f \ No newline at end of file diff --git a/plugins/repository-azure/licenses/azure-core-1.49.1.jar.sha1 b/plugins/repository-azure/licenses/azure-core-1.49.1.jar.sha1 new file mode 100644 index 0000000000000..d487c08c26e94 --- /dev/null +++ b/plugins/repository-azure/licenses/azure-core-1.49.1.jar.sha1 @@ -0,0 +1 @@ +a7c44282eaa0f5a3be4b920d6a057509adfe8674 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/azure-core-http-netty-1.12.8.jar.sha1 b/plugins/repository-azure/licenses/azure-core-http-netty-1.12.8.jar.sha1 deleted file mode 100644 index e6ee1dec64641..0000000000000 --- a/plugins/repository-azure/licenses/azure-core-http-netty-1.12.8.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -511ed2d02afb0f43f029df3d10ff80d2d3539f05 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/azure-core-http-netty-1.15.1.jar.sha1 b/plugins/repository-azure/licenses/azure-core-http-netty-1.15.1.jar.sha1 new file mode 100644 index 0000000000000..3a0747a0daacb --- /dev/null +++ b/plugins/repository-azure/licenses/azure-core-http-netty-1.15.1.jar.sha1 @@ -0,0 +1 @@ +036f7466a521aa99c79a491a9cf20444667df78b \ No newline at end of file diff --git a/plugins/repository-azure/licenses/azure-xml-1.0.0.jar.sha1 b/plugins/repository-azure/licenses/azure-xml-1.0.0.jar.sha1 new file mode 100644 index 0000000000000..798ec5d95c6ac --- /dev/null +++ b/plugins/repository-azure/licenses/azure-xml-1.0.0.jar.sha1 @@ -0,0 +1 @@ +ba584703bd47e9e789343ee3332f0f5a64f7f187 \ No newline at end of file diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java index 90143d907cd99..b0582624e21d5 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java @@ -23,10 +23,13 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import java.util.stream.Collectors; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; import io.opentelemetry.sdk.metrics.data.DoublePointData; import io.opentelemetry.sdk.metrics.data.MetricData; import io.opentelemetry.sdk.metrics.internal.data.ImmutableExponentialHistogramPointData; @@ -147,6 +150,36 @@ public void testGauge() throws Exception { } + public void testGaugeWithValueAndTagSupplier() throws Exception { + String metricName = "test-gauge"; + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + InMemorySingletonMetricsExporter.INSTANCE.reset(); + Tags tags = Tags.create().addTag("test", "integ-test"); + final AtomicInteger testValue = new AtomicInteger(0); + Supplier valueProvider = () -> { + return TaggedMeasurement.create(Double.valueOf(testValue.incrementAndGet()), tags); + }; + Closeable gaugeCloseable = metricsRegistry.createGauge(metricName, "test", "ms", valueProvider); + // Sleep for about 2.2s to wait for metrics to be published. + Thread.sleep(2200); + + InMemorySingletonMetricsExporter exporter = InMemorySingletonMetricsExporter.INSTANCE; + + assertTrue(getMaxObservableGaugeValue(exporter, metricName) >= 2.0); + + gaugeCloseable.close(); + double observableGaugeValueAfterStop = getMaxObservableGaugeValue(exporter, metricName); + + Map, Object> attributes = getMetricAttributes(exporter, metricName); + + assertEquals("integ-test", attributes.get(AttributeKey.stringKey("test"))); + + // Sleep for about 1.2s to wait for metrics to see that closed observableGauge shouldn't execute the callable. + Thread.sleep(1200); + assertEquals(observableGaugeValueAfterStop, getMaxObservableGaugeValue(exporter, metricName), 0.0); + + } + private static double getMaxObservableGaugeValue(InMemorySingletonMetricsExporter exporter, String metricName) { List dataPoints = exporter.getFinishedMetricItems() .stream() @@ -159,6 +192,15 @@ private static double getMaxObservableGaugeValue(InMemorySingletonMetricsExporte return totalValue; } + private static Map, Object> getMetricAttributes(InMemorySingletonMetricsExporter exporter, String metricName) { + List dataPoints = exporter.getFinishedMetricItems() + .stream() + .filter(a -> a.getName().contains(metricName)) + .collect(Collectors.toList()); + Attributes attributes = dataPoints.get(0).getDoubleGaugeData().getPoints().stream().findAny().get().getAttributes(); + return attributes.asMap(); + } + @After public void reset() { InMemorySingletonMetricsExporter.INSTANCE.reset(); diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java index 6fe08040d7af5..3258e91738ba6 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java @@ -101,6 +101,17 @@ public Closeable createGauge(String name, String description, String unit, Suppl return () -> doubleObservableGauge.close(); } + @Override + public Closeable createGauge(String name, String description, String unit, Supplier value) { + ObservableDoubleGauge doubleObservableGauge = AccessController.doPrivileged( + (PrivilegedAction) () -> otelMeter.gaugeBuilder(name) + .setUnit(unit) + .setDescription(description) + .buildWithCallback(record -> record.record(value.get().getValue(), OTelAttributesConverter.convert(value.get().getTags()))) + ); + return () -> doubleObservableGauge.close(); + } + @Override public void close() throws IOException { meterProvider.close(); diff --git a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java index 2e89a3c488d5c..794cafc1fb608 100644 --- a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java +++ b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java @@ -180,4 +180,34 @@ public void testGauge() throws Exception { closeable.close(); verify(observableDoubleGauge).close(); } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + public void testGaugeWithValueAndTagsSupplier() throws Exception { + String observableGaugeName = "test-gauge"; + String description = "test"; + String unit = "1"; + Meter mockMeter = mock(Meter.class); + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); + ObservableDoubleGauge observableDoubleGauge = mock(ObservableDoubleGauge.class); + DoubleGaugeBuilder mockOTelDoubleGaugeBuilder = mock(DoubleGaugeBuilder.class); + MeterProvider meterProvider = mock(MeterProvider.class); + when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); + when(mockMeter.gaugeBuilder(Mockito.contains(observableGaugeName))).thenReturn(mockOTelDoubleGaugeBuilder); + when(mockOTelDoubleGaugeBuilder.setDescription(description)).thenReturn(mockOTelDoubleGaugeBuilder); + when(mockOTelDoubleGaugeBuilder.setUnit(unit)).thenReturn(mockOTelDoubleGaugeBuilder); + when(mockOTelDoubleGaugeBuilder.buildWithCallback(any(Consumer.class))).thenReturn(observableDoubleGauge); + + Closeable closeable = metricsTelemetry.createGauge( + observableGaugeName, + description, + unit, + () -> TaggedMeasurement.create(1.0, Tags.EMPTY) + ); + closeable.close(); + verify(observableDoubleGauge).close(); + } } diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 9bf4a4b1e18f1..f0fc05c595d6f 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -51,6 +51,7 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.core.action.ShardOperationFailedException; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.AliasFilter; @@ -469,6 +470,10 @@ private void onRequestEnd(SearchRequestContext searchRequestContext) { this.searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(this, searchRequestContext); } + private void onRequestFailure(SearchRequestContext searchRequestContext) { + this.searchRequestContext.getSearchRequestOperationsListener().onRequestFailure(this, searchRequestContext); + } + private void executePhase(SearchPhase phase) { Span phaseSpan = tracer.startSpan(SpanCreationContext.server().name("[phase/" + phase.getName() + "]")); try (final SpanScope scope = tracer.withSpanInScope(phaseSpan)) { @@ -507,6 +512,7 @@ ShardSearchFailure[] buildShardFailures() { private void onShardFailure(final int shardIndex, @Nullable SearchShardTarget shard, final SearchShardIterator shardIt, Exception e) { // we always add the shard failure for a specific shard instance // we do make sure to clean it on a successful response from a shard + setPhaseResourceUsages(); onShardFailure(shardIndex, shard, e); SearchShardTarget nextShard = FailAwareWeightedRouting.getInstance() .findNext(shardIt, clusterState, e, () -> totalOps.incrementAndGet()); @@ -618,9 +624,15 @@ protected void onShardResult(Result result, SearchShardIterator shardIt) { if (logger.isTraceEnabled()) { logger.trace("got first-phase result from {}", result != null ? result.getSearchShardTarget() : null); } + this.setPhaseResourceUsages(); results.consumeResult(result, () -> onShardResultConsumed(result, shardIt)); } + public void setPhaseResourceUsages() { + TaskResourceInfo taskResourceUsage = searchRequestContext.getTaskResourceUsageSupplier().get(); + searchRequestContext.recordPhaseResourceUsage(taskResourceUsage); + } + private void onShardResultConsumed(Result result, SearchShardIterator shardIt) { successfulOps.incrementAndGet(); // clean a previous error on this shard group (note, this code will be serialized on the same shardIndex value level @@ -751,6 +763,7 @@ public void sendSearchResponse(InternalSearchResponse internalSearchResponse, At @Override public final void onPhaseFailure(SearchPhase phase, String msg, Throwable cause) { + setPhaseResourceUsages(); if (currentPhaseHasLifecycle) { this.searchRequestContext.getSearchRequestOperationsListener().onPhaseFailure(this, cause); } @@ -780,6 +793,7 @@ private void raisePhaseFailure(SearchPhaseExecutionException exception) { }); } Releasables.close(releasables); + onRequestFailure(searchRequestContext); listener.onFailure(exception); } diff --git a/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java b/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java index ebb2f33f8f37d..2ad7f8a29896c 100644 --- a/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java +++ b/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java @@ -240,6 +240,7 @@ private void executeFetch( public void innerOnResponse(FetchSearchResult result) { try { progressListener.notifyFetchResult(shardIndex); + context.setPhaseResourceUsages(); counter.onResult(result); } catch (Exception e) { context.onPhaseFailure(FetchSearchPhase.this, "", e); @@ -254,6 +255,7 @@ public void onFailure(Exception e) { e ); progressListener.notifyFetchFailure(shardIndex, shardTarget, e); + context.setPhaseResourceUsages(); counter.onFailure(shardIndex, shardTarget, e); } finally { // the search context might not be cleared on the node where the fetch was executed for example diff --git a/server/src/main/java/org/opensearch/action/search/SearchPhaseContext.java b/server/src/main/java/org/opensearch/action/search/SearchPhaseContext.java index df451e0745e3c..55f2a22749e70 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchPhaseContext.java +++ b/server/src/main/java/org/opensearch/action/search/SearchPhaseContext.java @@ -150,4 +150,9 @@ default void sendReleaseSearchContext( * Registers a {@link Releasable} that will be closed when the search request finishes or fails. */ void addReleasable(Releasable releasable); + + /** + * Set the resource usage info for this phase + */ + void setPhaseResourceUsages(); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java index 5b133ba0554f4..111d9c64550b3 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java @@ -8,13 +8,20 @@ package org.opensearch.action.search; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.search.TotalHits; import org.opensearch.common.annotation.InternalApi; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import java.util.ArrayList; import java.util.EnumMap; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.function.Supplier; /** * This class holds request-level context for search queries at the coordinator node @@ -23,6 +30,7 @@ */ @InternalApi public class SearchRequestContext { + private static final Logger logger = LogManager.getLogger(); private final SearchRequestOperationsListener searchRequestOperationsListener; private long absoluteStartNanos; private final Map phaseTookMap; @@ -30,13 +38,21 @@ public class SearchRequestContext { private final EnumMap shardStats; private final SearchRequest searchRequest; - - SearchRequestContext(final SearchRequestOperationsListener searchRequestOperationsListener, final SearchRequest searchRequest) { + private final LinkedBlockingQueue phaseResourceUsage; + private final Supplier taskResourceUsageSupplier; + + SearchRequestContext( + final SearchRequestOperationsListener searchRequestOperationsListener, + final SearchRequest searchRequest, + final Supplier taskResourceUsageSupplier + ) { this.searchRequestOperationsListener = searchRequestOperationsListener; this.absoluteStartNanos = System.nanoTime(); this.phaseTookMap = new HashMap<>(); this.shardStats = new EnumMap<>(ShardStatsFieldNames.class); this.searchRequest = searchRequest; + this.phaseResourceUsage = new LinkedBlockingQueue<>(); + this.taskResourceUsageSupplier = taskResourceUsageSupplier; } SearchRequestOperationsListener getSearchRequestOperationsListener() { @@ -108,6 +124,20 @@ String formattedShardStats() { } } + public Supplier getTaskResourceUsageSupplier() { + return taskResourceUsageSupplier; + } + + public void recordPhaseResourceUsage(TaskResourceInfo usage) { + if (usage != null) { + this.phaseResourceUsage.add(usage); + } + } + + public List getPhaseResourceUsage() { + return new ArrayList<>(phaseResourceUsage); + } + public SearchRequest getRequest() { return searchRequest; } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java b/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java index b944572cef122..61f19977ae5ce 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java @@ -51,6 +51,8 @@ protected void onRequestStart(SearchRequestContext searchRequestContext) {} protected void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} + protected void onRequestFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} + protected boolean isEnabled(SearchRequest searchRequest) { return isEnabled(); } @@ -133,6 +135,17 @@ public void onRequestEnd(SearchPhaseContext context, SearchRequestContext search } } + @Override + public void onRequestFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + for (SearchRequestOperationsListener listener : listeners) { + try { + listener.onRequestFailure(context, searchRequestContext); + } catch (Exception e) { + logger.warn(() -> new ParameterizedMessage("onRequestFailure listener [{}] failed", listener), e); + } + } + } + public List getListeners() { return listeners; } diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 143b01af3f62f..6e380775355a2 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -87,6 +87,7 @@ import org.opensearch.search.profile.SearchProfileShardResults; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.telemetry.metrics.MetricsRegistry; import org.opensearch.telemetry.tracing.Span; import org.opensearch.telemetry.tracing.SpanBuilder; @@ -186,6 +187,7 @@ public class TransportSearchAction extends HandledTransportAction) SearchRequest::new); this.client = client; @@ -224,6 +227,7 @@ public TransportSearchAction( clusterService.getClusterSettings() .addSettingsUpdateConsumer(SEARCH_QUERY_METRICS_ENABLED_SETTING, this::setSearchQueryMetricsEnabled); this.tracer = tracer; + this.taskResourceTrackingService = taskResourceTrackingService; } private void setSearchQueryMetricsEnabled(boolean searchQueryMetricsEnabled) { @@ -451,7 +455,11 @@ private void executeRequest( logger, TraceableSearchRequestOperationsListener.create(tracer, requestSpan) ); - SearchRequestContext searchRequestContext = new SearchRequestContext(requestOperationsListeners, originalSearchRequest); + SearchRequestContext searchRequestContext = new SearchRequestContext( + requestOperationsListeners, + originalSearchRequest, + taskResourceTrackingService::getTaskResourceUsageFromThreadContext + ); searchRequestContext.getSearchRequestOperationsListener().onRequestStart(searchRequestContext); PipelinedRequest searchRequest; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java index 987a3e3ffa7d3..7fa63ae8abc62 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java @@ -39,6 +39,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; import java.io.Closeable; @@ -52,6 +53,7 @@ import java.util.Set; import static org.opensearch.cluster.coordination.Coordinator.ZEN1_BWC_TERM; +import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; /** @@ -79,6 +81,7 @@ public class CoordinationState { private VotingConfiguration lastPublishedConfiguration; private VoteCollection publishVotes; private final boolean isRemoteStateEnabled; + private final boolean isRemotePublicationEnabled; public CoordinationState( DiscoveryNode localNode, @@ -102,6 +105,12 @@ public CoordinationState( .getLastAcceptedConfiguration(); this.publishVotes = new VoteCollection(); this.isRemoteStateEnabled = isRemoteStoreClusterStateEnabled(settings); + this.isRemotePublicationEnabled = FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) + && localNode.isRemoteStatePublicationEnabled(); + } + + public boolean isRemotePublicationEnabled() { + return isRemotePublicationEnabled; } public long getCurrentTerm() { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 4d371cba45e03..bbfa265d50bcf 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -86,6 +86,7 @@ import org.opensearch.discovery.PeerFinder; import org.opensearch.discovery.SeedHostsProvider; import org.opensearch.discovery.SeedHostsResolver; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.monitor.NodeHealthService; import org.opensearch.monitor.StatusInfo; import org.opensearch.node.remotestore.RemoteStoreNodeService; @@ -210,7 +211,8 @@ public Coordinator( NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, RemoteStoreNodeService remoteStoreNodeService, - ClusterManagerMetrics clusterManagerMetrics + ClusterManagerMetrics clusterManagerMetrics, + RemoteClusterStateService remoteClusterStateService ) { this.settings = settings; this.transportService = transportService; @@ -262,7 +264,8 @@ public Coordinator( transportService, namedWriteableRegistry, this::handlePublishRequest, - this::handleApplyCommit + this::handleApplyCommit, + remoteClusterStateService ); this.leaderChecker = new LeaderChecker( settings, @@ -1331,7 +1334,9 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) + clusterState; final PublicationTransportHandler.PublicationContext publicationContext = publicationHandler.newPublicationContext( - clusterChangedEvent + clusterChangedEvent, + coordinationState.get().isRemotePublicationEnabled(), + persistedStateRegistry ); final PublishRequest publishRequest = coordinationState.get().handleClientValue(clusterState); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index 1fdaeead0d28d..36eabd51ffda1 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -40,6 +40,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.Diff; import org.opensearch.cluster.IncompatibleClusterStateVersionException; +import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.core.action.ActionListener; @@ -47,6 +48,9 @@ import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.transport.TransportResponse; +import org.opensearch.gateway.GatewayMetaState.RemotePersistedState; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.BytesTransportRequest; import org.opensearch.transport.TransportChannel; @@ -74,6 +78,7 @@ public class PublicationTransportHandler { private static final Logger logger = LogManager.getLogger(PublicationTransportHandler.class); public static final String PUBLISH_STATE_ACTION_NAME = "internal:cluster/coordination/publish_state"; + public static final String PUBLISH_REMOTE_STATE_ACTION_NAME = "internal:cluster/coordination/publish_remote_state"; public static final String COMMIT_STATE_ACTION_NAME = "internal:cluster/coordination/commit_state"; private final TransportService transportService; @@ -97,16 +102,19 @@ public class PublicationTransportHandler { private final TransportRequestOptions stateRequestOptions = TransportRequestOptions.builder() .withType(TransportRequestOptions.Type.STATE) .build(); + private final RemoteClusterStateService remoteClusterStateService; public PublicationTransportHandler( TransportService transportService, NamedWriteableRegistry namedWriteableRegistry, Function handlePublishRequest, - BiConsumer> handleApplyCommit + BiConsumer> handleApplyCommit, + RemoteClusterStateService remoteClusterStateService ) { this.transportService = transportService; this.namedWriteableRegistry = namedWriteableRegistry; this.handlePublishRequest = handlePublishRequest; + this.remoteClusterStateService = remoteClusterStateService; transportService.registerRequestHandler( PUBLISH_STATE_ACTION_NAME, @@ -117,6 +125,15 @@ public PublicationTransportHandler( (request, channel, task) -> channel.sendResponse(handleIncomingPublishRequest(request)) ); + transportService.registerRequestHandler( + PUBLISH_REMOTE_STATE_ACTION_NAME, + ThreadPool.Names.GENERIC, + false, + false, + RemotePublishRequest::new, + (request, channel, task) -> channel.sendResponse(handleIncomingRemotePublishRequest(request)) + ); + transportService.registerRequestHandler( COMMIT_STATE_ACTION_NAME, ThreadPool.Names.GENERIC, @@ -211,6 +228,74 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque } } + // package private for testing + PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest request) throws IOException { + if (transportService.getLocalNode().equals(request.getSourceNode())) { + return acceptRemoteStateOnLocalNode(request); + } + // TODO Make cluster state download non-blocking: https://github.com/opensearch-project/OpenSearch/issues/14102 + ClusterMetadataManifest manifest = remoteClusterStateService.getClusterMetadataManifestByFileName( + request.getClusterUUID(), + request.getManifestFile() + ); + if (manifest == null) { + throw new IllegalStateException("Publication failed as manifest was not found for " + request); + } + boolean applyFullState = false; + final ClusterState lastSeen = lastSeenClusterState.get(); + if (lastSeen == null) { + logger.debug(() -> "Diff cannot be applied as there is no last cluster state"); + applyFullState = true; + } else if (manifest.getDiffManifest() == null) { + logger.trace(() -> "There is no diff in the manifest"); + applyFullState = true; + } else if (manifest.getDiffManifest().getFromStateUUID().equals(lastSeen.stateUUID()) == false) { + logger.debug(() -> "Last cluster state not compatible with the diff"); + applyFullState = true; + } + + if (applyFullState == true) { + logger.debug( + () -> new ParameterizedMessage( + "Downloading full cluster state for term {}, version {}, stateUUID {}", + manifest.getClusterTerm(), + manifest.getStateVersion(), + manifest.getStateUUID() + ) + ); + ClusterState clusterState = remoteClusterStateService.getClusterStateForManifest( + request.getClusterName(), + manifest, + transportService.getLocalNode().getId(), + true + ); + fullClusterStateReceivedCount.incrementAndGet(); + final PublishWithJoinResponse response = acceptState(clusterState); + lastSeenClusterState.set(clusterState); + return response; + } else { + logger.debug( + () -> new ParameterizedMessage( + "Downloading diff cluster state for term {}, version {}, previousUUID {}, current UUID {}", + manifest.getClusterTerm(), + manifest.getStateVersion(), + manifest.getDiffManifest().getFromStateUUID(), + manifest.getStateUUID() + ) + ); + ClusterState clusterState = remoteClusterStateService.getClusterStateUsingDiff( + request.getClusterName(), + manifest, + lastSeen, + transportService.getLocalNode().getId() + ); + compatibleClusterStateDiffReceivedCount.incrementAndGet(); + final PublishWithJoinResponse response = acceptState(clusterState); + lastSeenClusterState.compareAndSet(lastSeen, clusterState); + return response; + } + } + private PublishWithJoinResponse acceptState(ClusterState incomingState) { // if the state is coming from the current node, use original request instead (see currentPublishRequestToSelf for explanation) if (transportService.getLocalNode().equals(incomingState.nodes().getClusterManagerNode())) { @@ -224,8 +309,35 @@ private PublishWithJoinResponse acceptState(ClusterState incomingState) { return handlePublishRequest.apply(new PublishRequest(incomingState)); } - public PublicationContext newPublicationContext(ClusterChangedEvent clusterChangedEvent) { - final PublicationContext publicationContext = new PublicationContext(clusterChangedEvent); + private PublishWithJoinResponse acceptRemoteStateOnLocalNode(RemotePublishRequest remotePublishRequest) { + final PublishRequest publishRequest = currentPublishRequestToSelf.get(); + if (publishRequest == null + || publishRequest.getAcceptedState().coordinationMetadata().term() != remotePublishRequest.term + || publishRequest.getAcceptedState().version() != remotePublishRequest.version) { + logger.debug( + () -> new ParameterizedMessage( + "Publication failure for current publish request : {} and remote publish request: {}", + publishRequest, + remotePublishRequest + ) + ); + throw new IllegalStateException("publication to self failed for " + remotePublishRequest); + } + PublishWithJoinResponse publishWithJoinResponse = handlePublishRequest.apply(publishRequest); + lastSeenClusterState.set(publishRequest.getAcceptedState()); + return publishWithJoinResponse; + } + + public PublicationContext newPublicationContext( + ClusterChangedEvent clusterChangedEvent, + boolean isRemotePublicationEnabled, + PersistedStateRegistry persistedStateRegistry + ) { + final PublicationContext publicationContext = new PublicationContext( + clusterChangedEvent, + isRemotePublicationEnabled, + persistedStateRegistry + ); // Build the serializations we expect to need now, early in the process, so that an error during serialization fails the publication // straight away. This isn't watertight since we send diffs on a best-effort basis and may fall back to sending a full state (and @@ -234,6 +346,16 @@ public PublicationContext newPublicationContext(ClusterChangedEvent clusterChang return publicationContext; } + // package private for testing + void setCurrentPublishRequestToSelf(PublishRequest publishRequest) { + this.currentPublishRequestToSelf.set(publishRequest); + } + + // package private for testing + void setLastSeenClusterState(ClusterState clusterState) { + this.lastSeenClusterState.set(clusterState); + } + private static BytesReference serializeFullClusterState(ClusterState clusterState, Version nodeVersion) throws IOException { final BytesReference serializedState = CompressedStreamUtils.createCompressedStream(nodeVersion, stream -> { stream.writeBoolean(true); @@ -270,12 +392,20 @@ public class PublicationContext { private final boolean sendFullVersion; private final Map serializedStates = new HashMap<>(); private final Map serializedDiffs = new HashMap<>(); + private final boolean sendRemoteState; + private final PersistedStateRegistry persistedStateRegistry; - PublicationContext(ClusterChangedEvent clusterChangedEvent) { + PublicationContext( + ClusterChangedEvent clusterChangedEvent, + boolean isRemotePublicationEnabled, + PersistedStateRegistry persistedStateRegistry + ) { discoveryNodes = clusterChangedEvent.state().nodes(); newState = clusterChangedEvent.state(); previousState = clusterChangedEvent.previousState(); sendFullVersion = previousState.getBlocks().disableStatePersistence(); + sendRemoteState = isRemotePublicationEnabled; + this.persistedStateRegistry = persistedStateRegistry; } void buildDiffAndSerializeStates() { @@ -339,7 +469,11 @@ public void onFailure(Exception e) { } else { responseActionListener = listener; } - if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { + // TODO Decide to send remote state before starting publication by checking remote publication on all nodes + if (sendRemoteState && destination.isRemoteStatePublicationEnabled()) { + logger.trace("sending remote cluster state version [{}] to [{}]", newState.version(), destination); + sendRemoteClusterState(destination, publishRequest.getAcceptedState(), responseActionListener); + } else if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); sendFullClusterState(destination, responseActionListener); } else { @@ -384,6 +518,61 @@ public String executor() { ); } + private void sendRemoteClusterState( + final DiscoveryNode destination, + final ClusterState clusterState, + final ActionListener listener + ) { + try { + final String manifestFileName = ((RemotePersistedState) persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)) + .getLastUploadedManifestFile(); + final RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + discoveryNodes.getLocalNode(), + clusterState.term(), + clusterState.getVersion(), + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID(), + manifestFileName + ); + final Consumer transportExceptionHandler = exp -> { + logger.debug(() -> new ParameterizedMessage("failed to send remote cluster state to {}", destination), exp); + listener.onFailure(exp); + }; + final TransportResponseHandler responseHandler = new TransportResponseHandler<>() { + + @Override + public PublishWithJoinResponse read(StreamInput in) throws IOException { + return new PublishWithJoinResponse(in); + } + + @Override + public void handleResponse(PublishWithJoinResponse response) { + listener.onResponse(response); + } + + @Override + public void handleException(TransportException exp) { + transportExceptionHandler.accept(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.GENERIC; + } + }; + transportService.sendRequest( + destination, + PUBLISH_REMOTE_STATE_ACTION_NAME, + remotePublishRequest, + stateRequestOptions, + responseHandler + ); + } catch (Exception e) { + logger.warn(() -> new ParameterizedMessage("error sending remote cluster state to {}", destination), e); + listener.onFailure(e); + } + } + private void sendFullClusterState(DiscoveryNode destination, ActionListener listener) { BytesReference bytes = serializedStates.get(destination.getVersion()); if (bytes == null) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java b/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java new file mode 100644 index 0000000000000..9461c5ee63627 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Send the publish request with the remote cluster state details + * @opensearch.internal + */ +public class RemotePublishRequest extends TermVersionRequest { + + private final String clusterName; + private final String clusterUUID; + private final String manifestFile; + + public RemotePublishRequest( + DiscoveryNode sourceNode, + long term, + long version, + String clusterName, + String clusterUUID, + String manifestFile + ) { + super(sourceNode, term, version); + this.clusterName = clusterName; + this.clusterUUID = clusterUUID; + this.manifestFile = manifestFile; + } + + public RemotePublishRequest(StreamInput in) throws IOException { + super(in); + this.clusterName = in.readString(); + this.clusterUUID = in.readString(); + this.manifestFile = in.readString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(clusterName); + out.writeString(clusterUUID); + out.writeString(manifestFile); + } + + @Override + public String toString() { + return "RemotePublishRequest{" + + "term=" + + term + + ", version=" + + version + + ", clusterName=" + + clusterName + + ", clusterUUID=" + + clusterUUID + + ", sourceNode=" + + sourceNode + + ", manifestFile=" + + manifestFile + + '}'; + } + + public String getClusterName() { + return clusterName; + } + + public String getClusterUUID() { + return clusterUUID; + } + + public String getManifestFile() { + return manifestFile; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 93d6f6b0e4d22..ac158416d0c3b 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -63,7 +63,9 @@ import java.util.stream.Stream; import static org.opensearch.node.NodeRoleSettings.NODE_ROLES_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; /** * A discovery node represents a node that is part of the cluster. @@ -520,6 +522,18 @@ public boolean isRemoteStoreNode() { return this.getAttributes().keySet().stream().anyMatch(key -> key.startsWith(REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)); } + /** + * Returns whether remote cluster state publication is enabled on this node + * @return true if the node contains remote cluster state node attribute and remote routing table node attribute + */ + public boolean isRemoteStatePublicationEnabled() { + return this.getAttributes() + .keySet() + .stream() + .anyMatch(key -> (key.equals(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY))) + && this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY)); + } + /** * Returns a set of all the roles that the node has. The roles are returned in sorted order by the role name. *

diff --git a/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java b/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java index 6580b0e0085ef..906a27e9f398c 100644 --- a/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java +++ b/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java @@ -483,6 +483,16 @@ public void addResponseHeader(final String key, final String value) { addResponseHeader(key, value, v -> v); } + /** + * Update the {@code value} for the specified {@code key} + * + * @param key the header name + * @param value the header value + */ + public void updateResponseHeader(final String key, final String value) { + updateResponseHeader(key, value, v -> v); + } + /** * Add the {@code value} for the specified {@code key} with the specified {@code uniqueValue} used for de-duplication. Any duplicate * {@code value} after applying {@code uniqueValue} is ignored. @@ -492,7 +502,19 @@ public void addResponseHeader(final String key, final String value) { * @param uniqueValue the function that produces de-duplication values */ public void addResponseHeader(final String key, final String value, final Function uniqueValue) { - threadLocal.set(threadLocal.get().putResponse(key, value, uniqueValue, maxWarningHeaderCount, maxWarningHeaderSize)); + threadLocal.set(threadLocal.get().putResponse(key, value, uniqueValue, maxWarningHeaderCount, maxWarningHeaderSize, false)); + } + + /** + * Update the {@code value} for the specified {@code key} with the specified {@code uniqueValue} used for de-duplication. Any duplicate + * {@code value} after applying {@code uniqueValue} is ignored. + * + * @param key the header name + * @param value the header value + * @param uniqueValue the function that produces de-duplication values + */ + public void updateResponseHeader(final String key, final String value, final Function uniqueValue) { + threadLocal.set(threadLocal.get().putResponse(key, value, uniqueValue, maxWarningHeaderCount, maxWarningHeaderSize, true)); } /** @@ -717,7 +739,8 @@ private ThreadContextStruct putResponse( final String value, final Function uniqueValue, final int maxWarningHeaderCount, - final long maxWarningHeaderSize + final long maxWarningHeaderSize, + final boolean replaceExistingKey ) { assert value != null; long newWarningHeaderSize = warningHeadersSize; @@ -759,8 +782,13 @@ private ThreadContextStruct putResponse( if (existingValues.contains(uniqueValue.apply(value))) { return this; } - // preserve insertion order - final Set newValues = Stream.concat(existingValues.stream(), Stream.of(value)).collect(LINKED_HASH_SET_COLLECTOR); + Set newValues; + if (replaceExistingKey) { + newValues = Stream.of(value).collect(LINKED_HASH_SET_COLLECTOR); + } else { + // preserve insertion order + newValues = Stream.concat(existingValues.stream(), Stream.of(value)).collect(LINKED_HASH_SET_COLLECTOR); + } newResponseHeaders = new HashMap<>(responseHeaders); newResponseHeaders.put(key, Collections.unmodifiableSet(newValues)); } else { diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java index 538dea5b2e60b..922e23b849d49 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java @@ -53,6 +53,7 @@ import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.common.transport.TransportAddress; import org.opensearch.gateway.GatewayMetaState; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.monitor.NodeHealthService; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.plugins.DiscoveryPlugin; @@ -135,7 +136,8 @@ public DiscoveryModule( NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, RemoteStoreNodeService remoteStoreNodeService, - ClusterManagerMetrics clusterManagerMetrics + ClusterManagerMetrics clusterManagerMetrics, + RemoteClusterStateService remoteClusterStateService ) { final Collection> joinValidators = new ArrayList<>(); final Map> hostProviders = new HashMap<>(); @@ -214,7 +216,8 @@ public DiscoveryModule( nodeHealthService, persistedStateRegistry, remoteStoreNodeService, - clusterManagerMetrics + clusterManagerMetrics, + remoteClusterStateService ); } else { throw new IllegalArgumentException("Unknown discovery type [" + discoveryType + "]"); diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 24183f2d2675f..811a5161a6f6e 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -65,6 +65,7 @@ import org.opensearch.env.NodeMetadata; import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.node.Node; @@ -666,6 +667,8 @@ public static class RemotePersistedState implements PersistedState { private ClusterState lastAcceptedState; private ClusterMetadataManifest lastAcceptedManifest; + + private String lastUploadedManifestFile; private final RemoteClusterStateService remoteClusterStateService; private String previousClusterUUID; @@ -691,10 +694,14 @@ public void setCurrentTerm(long currentTerm) { // But for RemotePersistedState, the state is only pushed by the active cluster. So this method is not required. } + public String getLastUploadedManifestFile() { + return lastUploadedManifestFile; + } + @Override public void setLastAcceptedState(ClusterState clusterState) { try { - final ClusterMetadataManifest manifest; + final RemoteClusterStateManifestInfo manifestDetails; if (shouldWriteFullClusterState(clusterState)) { final Optional latestManifest = remoteClusterStateService.getLatestClusterMetadataManifest( clusterState.getClusterName().value(), @@ -712,15 +719,21 @@ public void setLastAcceptedState(ClusterState clusterState) { clusterState.metadata().clusterUUID() ); } - manifest = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID); + manifestDetails = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID); } else { assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == true : "Previous manifest and previous ClusterState are not in sync"; - manifest = remoteClusterStateService.writeIncrementalMetadata(lastAcceptedState, clusterState, lastAcceptedManifest); + manifestDetails = remoteClusterStateService.writeIncrementalMetadata( + lastAcceptedState, + clusterState, + lastAcceptedManifest + ); } - assert verifyManifestAndClusterState(manifest, clusterState) == true : "Manifest and ClusterState are not in sync"; - lastAcceptedManifest = manifest; + assert verifyManifestAndClusterState(manifestDetails.getClusterMetadataManifest(), clusterState) == true + : "Manifest and ClusterState are not in sync"; + lastAcceptedManifest = manifestDetails.getClusterMetadataManifest(); lastAcceptedState = clusterState; + lastUploadedManifestFile = manifestDetails.getManifestFileName(); } catch (Exception e) { remoteClusterStateService.writeMetadataFailed(); handleExceptionOnWrite(e); @@ -768,12 +781,13 @@ public void markLastAcceptedStateAsCommitted() { metadataBuilder.clusterUUIDCommitted(true); clusterState = ClusterState.builder(lastAcceptedState).metadata(metadataBuilder).build(); } - final ClusterMetadataManifest committedManifest = remoteClusterStateService.markLastStateAsCommitted( + final RemoteClusterStateManifestInfo committedManifestDetails = remoteClusterStateService.markLastStateAsCommitted( clusterState, lastAcceptedManifest ); - lastAcceptedManifest = committedManifest; + lastAcceptedManifest = committedManifestDetails.getClusterMetadataManifest(); lastAcceptedState = clusterState; + lastUploadedManifestFile = committedManifestDetails.getManifestFileName(); } catch (Exception e) { handleExceptionOnWrite(e); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 175727a0093c6..90c80ccdf78a1 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -332,6 +332,11 @@ public Map getCustomMetadataMap() { return uploadedCustomMetadataMap; } + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + public ClusterStateDiffManifest getDiffManifest() { + return new ClusterStateDiffManifest(); + } + public boolean hasMetadataAttributesFiles() { return uploadedCoordinationMetadata != null || uploadedSettingsMetadata != null diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java new file mode 100644 index 0000000000000..d59c6042e7834 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java @@ -0,0 +1,22 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +/** + * Manifest of diff between two cluster states + * + * @opensearch.internal + */ +public class ClusterStateDiffManifest { + + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + public String getFromStateUUID() { + return null; + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 6bd876b4f511c..240b00e67af0c 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -36,6 +36,7 @@ import org.opensearch.core.xcontent.ToXContent; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.node.Node; @@ -268,7 +269,7 @@ public RemoteClusterStateService( * @return A manifest object which contains the details of uploaded entity metadata. */ @Nullable - public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, String previousClusterUUID) throws IOException { + public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterState, String previousClusterUUID) throws IOException { final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { logger.error("Local node is not elected cluster manager. Exiting"); @@ -284,7 +285,7 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri true, true ); - final ClusterMetadataManifest manifest = uploadManifest( + final RemoteClusterStateManifestInfo manifestDetails = uploadManifest( clusterState, uploadedMetadataResults.uploadedIndexMetadata, previousClusterUUID, @@ -311,7 +312,7 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri uploadedMetadataResults.uploadedIndexMetadata.size() ); } - return manifest; + return manifestDetails; } /** @@ -322,7 +323,7 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri * @return The uploaded ClusterMetadataManifest file */ @Nullable - public ClusterMetadataManifest writeIncrementalMetadata( + public RemoteClusterStateManifestInfo writeIncrementalMetadata( ClusterState previousClusterState, ClusterState clusterState, ClusterMetadataManifest previousManifest @@ -404,7 +405,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( customsToBeDeletedFromRemote.keySet().forEach(allUploadedCustomMap::remove); indicesToBeDeletedFromRemote.keySet().forEach(allUploadedIndexMetadata::remove); - final ClusterMetadataManifest manifest = uploadManifest( + final RemoteClusterStateManifestInfo manifestDetails = uploadManifest( clusterState, new ArrayList<>(allUploadedIndexMetadata.values()), previousManifest.getPreviousClusterUUID(), @@ -422,7 +423,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( remoteStateStats.stateTook(durationMillis); ParameterizedMessage clusterStateUploadTimeMessage = new ParameterizedMessage( CLUSTER_STATE_UPLOAD_TIME_LOG_STRING, - manifest.getStateVersion(), + manifestDetails.getClusterMetadataManifest().getStateVersion(), durationMillis ); ParameterizedMessage metadataUpdateMessage = new ParameterizedMessage( @@ -444,7 +445,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( } else { logger.info("{}; {}", clusterStateUploadTimeMessage, metadataUpdateMessage); } - return manifest; + return manifestDetails; } private UploadedMetadataResults writeMetadataInParallel( @@ -724,7 +725,7 @@ public RemoteClusterStateCleanupManager getCleanupManager() { } @Nullable - public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) + public RemoteClusterStateManifestInfo markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) throws IOException { assert clusterState != null : "Last accepted cluster state is not set"; if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { @@ -732,7 +733,7 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat return null; } assert previousManifest != null : "Last cluster metadata manifest is not set"; - ClusterMetadataManifest committedManifest = uploadManifest( + RemoteClusterStateManifestInfo committedManifestDetails = uploadManifest( clusterState, previousManifest.getIndices(), previousManifest.getPreviousClusterUUID(), @@ -742,11 +743,11 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat previousManifest.getCustomMetadataMap(), true ); - if (!previousManifest.isClusterUUIDCommitted() && committedManifest.isClusterUUIDCommitted()) { - remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifest); + if (!previousManifest.isClusterUUIDCommitted() && committedManifestDetails.getClusterMetadataManifest().isClusterUUIDCommitted()) { + remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifestDetails.getClusterMetadataManifest()); } - return committedManifest; + return committedManifestDetails; } @Override @@ -773,7 +774,7 @@ public void start() { this.remoteRoutingTableService.ifPresent(RemoteRoutingTableService::start); } - private ClusterMetadataManifest uploadManifest( + private RemoteClusterStateManifestInfo uploadManifest( ClusterState clusterState, List uploadedIndexMetadata, String previousClusterUUID, @@ -812,7 +813,7 @@ private ClusterMetadataManifest uploadManifest( new ArrayList<>() ); writeMetadataManifest(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifest, manifestFileName); - return manifest; + return new RemoteClusterStateManifestInfo(manifest, manifestFileName); } } @@ -1091,6 +1092,31 @@ public ClusterState getLatestClusterState(String clusterName, String clusterUUID .build(); } + public ClusterState getClusterStateForManifest( + String clusterName, + ClusterMetadataManifest manifest, + String localNodeId, + boolean includeEphemeral + ) { + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + return null; + } + + public ClusterState getClusterStateUsingDiff( + String clusterName, + ClusterMetadataManifest manifest, + ClusterState previousClusterState, + String localNodeId + ) { + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + return null; + } + + public ClusterMetadataManifest getClusterMetadataManifestByFileName(String clusterUUID, String manifestFileName) { + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + return null; + } + private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { String globalMetadataFileName = clusterMetadataManifest.getGlobalMetadataFileName(); try { diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateManifestInfo.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateManifestInfo.java new file mode 100644 index 0000000000000..5d987e5e21e1a --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateManifestInfo.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.model; + +import org.opensearch.gateway.remote.ClusterMetadataManifest; + +/** + * A class encapsulating the cluster state manifest and its remote uploaded path + */ +public class RemoteClusterStateManifestInfo { + + private final ClusterMetadataManifest clusterMetadataManifest; + private final String manifestFileName; + + public RemoteClusterStateManifestInfo(final ClusterMetadataManifest manifest, final String manifestFileName) { + this.clusterMetadataManifest = manifest; + this.manifestFileName = manifestFileName; + } + + public ClusterMetadataManifest getClusterMetadataManifest() { + return clusterMetadataManifest; + } + + public String getManifestFileName() { + return manifestFileName; + } +} diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 35b8295f26a08..5f6708e98d38e 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -1196,7 +1196,8 @@ protected Node( fsHealthService, persistedStateRegistry, remoteStoreNodeService, - clusterManagerMetrics + clusterManagerMetrics, + remoteClusterStateService ); final SearchPipelineService searchPipelineService = new SearchPipelineService( clusterService, @@ -1257,7 +1258,8 @@ protected Node( searchModule.getFetchPhase(), responseCollectorService, circuitBreakerService, - searchModule.getIndexSearcherExecutor(threadPool) + searchModule.getIndexSearcherExecutor(threadPool), + taskResourceTrackingService ); final List> tasksExecutors = pluginsService.filterPlugins(PersistentTaskPlugin.class) @@ -1900,7 +1902,8 @@ protected SearchService newSearchService( FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, - Executor indexSearcherExecutor + Executor indexSearcherExecutor, + TaskResourceTrackingService taskResourceTrackingService ) { return new SearchService( clusterService, @@ -1912,7 +1915,8 @@ protected SearchService newSearchService( fetchPhase, responseCollectorService, circuitBreakerService, - indexSearcherExecutor + indexSearcherExecutor, + taskResourceTrackingService ); } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index eece90f028047..2fccc8be8f34c 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -138,6 +138,7 @@ import org.opensearch.search.sort.SortOrder; import org.opensearch.search.suggest.Suggest; import org.opensearch.search.suggest.completion.CompletionSuggestion; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.Scheduler.Cancellable; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; @@ -339,6 +340,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private final AtomicInteger openPitContexts = new AtomicInteger(); private final String sessionId = UUIDs.randomBase64UUID(); private final Executor indexSearcherExecutor; + private final TaskResourceTrackingService taskResourceTrackingService; public SearchService( ClusterService clusterService, @@ -350,7 +352,8 @@ public SearchService( FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, - Executor indexSearcherExecutor + Executor indexSearcherExecutor, + TaskResourceTrackingService taskResourceTrackingService ) { Settings settings = clusterService.getSettings(); this.threadPool = threadPool; @@ -367,6 +370,7 @@ public SearchService( circuitBreakerService.getBreaker(CircuitBreaker.REQUEST) ); this.indexSearcherExecutor = indexSearcherExecutor; + this.taskResourceTrackingService = taskResourceTrackingService; TimeValue keepAliveInterval = KEEPALIVE_INTERVAL_SETTING.get(settings); setKeepAlives(DEFAULT_KEEPALIVE_SETTING.get(settings), MAX_KEEPALIVE_SETTING.get(settings)); setPitKeepAlives(DEFAULT_KEEPALIVE_SETTING.get(settings), MAX_PIT_KEEPALIVE_SETTING.get(settings)); @@ -559,6 +563,8 @@ private DfsSearchResult executeDfsPhase(ShardSearchRequest request, SearchShardT logger.trace("Dfs phase failed", e); processFailure(readerContext, e); throw e; + } finally { + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } } @@ -661,6 +667,8 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh logger.trace("Query phase failed", e); processFailure(readerContext, e); throw e; + } finally { + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } } @@ -706,6 +714,8 @@ public void executeQueryPhase( logger.trace("Query phase failed", e); // we handle the failure in the failure listener below throw e; + } finally { + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -738,6 +748,8 @@ public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, logger.trace("Query phase failed", e); // we handle the failure in the failure listener below throw e; + } finally { + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -787,6 +799,8 @@ public void executeFetchPhase( logger.trace("Fetch phase failed", e); // we handle the failure in the failure listener below throw e; + } finally { + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -817,6 +831,8 @@ public void executeFetchPhase(ShardFetchRequest request, SearchShardTask task, A assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); // we handle the failure in the failure listener below throw e; + } finally { + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -1754,6 +1770,7 @@ public CanMatchResponse(boolean canMatch, MinAndMax estimatedMinAndMax) { @Override public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); out.writeBoolean(canMatch); if (out.getVersion().onOrAfter(LegacyESVersion.V_7_6_0)) { out.writeOptionalWriteable(estimatedMinAndMax); diff --git a/server/src/main/java/org/opensearch/tasks/Task.java b/server/src/main/java/org/opensearch/tasks/Task.java index 785abc679ed1d..a366c8b920316 100644 --- a/server/src/main/java/org/opensearch/tasks/Task.java +++ b/server/src/main/java/org/opensearch/tasks/Task.java @@ -476,6 +476,18 @@ public void stopThreadResourceTracking(long threadId, ResourceStatsType statsTyp throw new IllegalStateException("cannot update final values if active thread resource entry is not present"); } + public ThreadResourceInfo getActiveThreadResourceInfo(long threadId, ResourceStatsType statsType) { + final List threadResourceInfoList = resourceStats.get(threadId); + if (threadResourceInfoList != null) { + for (ThreadResourceInfo threadResourceInfo : threadResourceInfoList) { + if (threadResourceInfo.getStatsType() == statsType && threadResourceInfo.isActive()) { + return threadResourceInfo; + } + } + } + return null; + } + /** * Individual tasks can override this if they want to support task resource tracking. We just need to make sure that * the ThreadPool on which the task runs on have runnable wrapper similar to diff --git a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java index f32559f6314c0..ca1957cdb1633 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java @@ -14,6 +14,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.ExceptionsHelper; +import org.opensearch.action.search.SearchShardTask; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.inject.Inject; import org.opensearch.common.settings.ClusterSettings; @@ -22,12 +23,23 @@ import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.common.util.concurrent.ConcurrentMapLong; import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.core.tasks.resourcetracker.ResourceStatsType; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageInfo; import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; import org.opensearch.core.tasks.resourcetracker.ThreadResourceInfo; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; import org.opensearch.threadpool.RunnableTaskExecutionListener; import org.opensearch.threadpool.ThreadPool; +import java.io.IOException; import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.Collections; @@ -51,6 +63,7 @@ public class TaskResourceTrackingService implements RunnableTaskExecutionListene Setting.Property.NodeScope ); public static final String TASK_ID = "TASK_ID"; + public static final String TASK_RESOURCE_USAGE = "TASK_RESOURCE_USAGE"; private static final ThreadMXBean threadMXBean = (ThreadMXBean) ManagementFactory.getThreadMXBean(); @@ -261,6 +274,86 @@ private ThreadContext.StoredContext addTaskIdToThreadContext(Task task) { return storedContext; } + /** + * Get the current task level resource usage. + * + * @param task {@link SearchShardTask} + * @param nodeId the local nodeId + */ + public void writeTaskResourceUsage(SearchShardTask task, String nodeId) { + try { + // Get resource usages from when the task started + ThreadResourceInfo threadResourceInfo = task.getActiveThreadResourceInfo( + Thread.currentThread().getId(), + ResourceStatsType.WORKER_STATS + ); + if (threadResourceInfo == null) { + return; + } + Map startValues = threadResourceInfo.getResourceUsageInfo().getStatsInfo(); + if (!(startValues.containsKey(ResourceStats.CPU) && startValues.containsKey(ResourceStats.MEMORY))) { + return; + } + // Get current resource usages + ResourceUsageMetric[] endValues = getResourceUsageMetricsForThread(Thread.currentThread().getId()); + long cpu = -1, mem = -1; + for (ResourceUsageMetric endValue : endValues) { + if (endValue.getStats() == ResourceStats.MEMORY) { + mem = endValue.getValue(); + } else if (endValue.getStats() == ResourceStats.CPU) { + cpu = endValue.getValue(); + } + } + if (cpu == -1 || mem == -1) { + logger.debug("Invalid resource usage value, cpu [{}], memory [{}]: ", cpu, mem); + return; + } + + // Build task resource usage info + TaskResourceInfo taskResourceInfo = new TaskResourceInfo.Builder().setAction(task.getAction()) + .setTaskId(task.getId()) + .setParentTaskId(task.getParentTaskId().getId()) + .setNodeId(nodeId) + .setTaskResourceUsage( + new TaskResourceUsage( + cpu - startValues.get(ResourceStats.CPU).getStartValue(), + mem - startValues.get(ResourceStats.MEMORY).getStartValue() + ) + ) + .build(); + // Remove the existing TASK_RESOURCE_USAGE header since it would have come from an earlier phase in the same request. + threadPool.getThreadContext().updateResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); + } catch (Exception e) { + logger.debug("Error during writing task resource usage: ", e); + } + } + + /** + * Get the task resource usages from {@link ThreadContext} + * + * @return {@link TaskResourceInfo} + */ + public TaskResourceInfo getTaskResourceUsageFromThreadContext() { + List taskResourceUsages = threadPool.getThreadContext().getResponseHeaders().get(TASK_RESOURCE_USAGE); + if (taskResourceUsages != null && taskResourceUsages.size() > 0) { + String usage = taskResourceUsages.get(0); + try { + if (usage != null && !usage.isEmpty()) { + XContentParser parser = XContentHelper.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + new BytesArray(usage), + MediaTypeRegistry.JSON + ); + return TaskResourceInfo.PARSER.apply(parser, null); + } + } catch (IOException e) { + logger.debug("fail to parse phase resource usages: ", e); + } + } + return null; + } + /** * Listener that gets invoked when a task execution completes. */ diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 7dcbf213d6c9d..27336e86e52b0 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -49,6 +49,8 @@ import org.opensearch.core.common.breaker.NoopCircuitBreaker; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.shard.ShardNotFoundException; import org.opensearch.search.SearchPhaseResult; @@ -87,6 +89,7 @@ import java.util.function.BiFunction; import java.util.stream.IntStream; +import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.instanceOf; @@ -123,7 +126,8 @@ private AbstractSearchAsyncAction createAction( ArraySearchPhaseResults results, ActionListener listener, final boolean controlled, - final AtomicLong expected + final AtomicLong expected, + final TaskResourceUsage resourceUsage ) { return createAction( request, @@ -133,6 +137,7 @@ private AbstractSearchAsyncAction createAction( false, false, expected, + resourceUsage, new SearchShardIterator(null, null, Collections.emptyList(), null) ); } @@ -145,6 +150,7 @@ private AbstractSearchAsyncAction createAction( final boolean failExecutePhaseOnShard, final boolean catchExceptionWhenExecutePhaseOnShard, final AtomicLong expected, + final TaskResourceUsage resourceUsage, final SearchShardIterator... shards ) { @@ -166,6 +172,14 @@ private AbstractSearchAsyncAction createAction( return null; }; + TaskResourceInfo taskResourceInfo = new TaskResourceInfo.Builder().setTaskResourceUsage(resourceUsage) + .setTaskId(randomLong()) + .setParentTaskId(randomLong()) + .setAction(randomAlphaOfLengthBetween(1, 5)) + .setNodeId(randomAlphaOfLengthBetween(1, 5)) + .build(); + threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); + return new AbstractSearchAsyncAction( "test", logger, @@ -186,7 +200,8 @@ private AbstractSearchAsyncAction createAction( SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - request + request, + () -> null ), NoopTracer.INSTANCE ) { @@ -248,7 +263,8 @@ private void runTestTook(final boolean controlled) { new ArraySearchPhaseResults<>(10), null, controlled, - expected + expected, + new TaskResourceUsage(0, 0) ); final long actual = action.buildTookInMillis(); if (controlled) { @@ -268,7 +284,8 @@ public void testBuildShardSearchTransportRequest() { new ArraySearchPhaseResults<>(10), null, false, - expected + expected, + new TaskResourceUsage(randomLong(), randomLong()) ); String clusterAlias = randomBoolean() ? null : randomAlphaOfLengthBetween(5, 10); SearchShardIterator iterator = new SearchShardIterator( @@ -291,19 +308,39 @@ public void testBuildShardSearchTransportRequest() { public void testBuildSearchResponse() { SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(randomBoolean()); ArraySearchPhaseResults phaseResults = new ArraySearchPhaseResults<>(10); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, null, false, new AtomicLong()); + TaskResourceUsage taskResourceUsage = new TaskResourceUsage(randomLong(), randomLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + null, + false, + new AtomicLong(), + taskResourceUsage + ); InternalSearchResponse internalSearchResponse = InternalSearchResponse.empty(); SearchResponse searchResponse = action.buildSearchResponse(internalSearchResponse, action.buildShardFailures(), null, null); assertSame(searchResponse.getAggregations(), internalSearchResponse.aggregations()); assertSame(searchResponse.getSuggest(), internalSearchResponse.suggest()); assertSame(searchResponse.getProfileResults(), internalSearchResponse.profile()); assertSame(searchResponse.getHits(), internalSearchResponse.hits()); + List resourceUsages = threadPool.getThreadContext().getResponseHeaders().get(TASK_RESOURCE_USAGE); + assertNotNull(resourceUsages); + assertEquals(1, resourceUsages.size()); + assertTrue(resourceUsages.get(0).contains(Long.toString(taskResourceUsage.getCpuTimeInNanos()))); + assertTrue(resourceUsages.get(0).contains(Long.toString(taskResourceUsage.getMemoryInBytes()))); } public void testBuildSearchResponseAllowPartialFailures() { SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true); final ArraySearchPhaseResults queryResult = new ArraySearchPhaseResults<>(10); - AbstractSearchAsyncAction action = createAction(searchRequest, queryResult, null, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + queryResult, + null, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); action.onShardFailure( 0, new SearchShardTarget("node", new ShardId("index", "index-uuid", 0), null, OriginalIndices.NONE), @@ -325,7 +362,14 @@ public void testSendSearchResponseDisallowPartialFailures() { List> nodeLookups = new ArrayList<>(); int numFailures = randomIntBetween(1, 5); ArraySearchPhaseResults phaseResults = phaseResults(requestIds, nodeLookups, numFailures); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + listener, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); for (int i = 0; i < numFailures; i++) { ShardId failureShardId = new ShardId("index", "index-uuid", i); String failureClusterAlias = randomBoolean() ? null : randomAlphaOfLengthBetween(5, 10); @@ -404,7 +448,14 @@ public void testOnPhaseFailure() { Set requestIds = new HashSet<>(); List> nodeLookups = new ArrayList<>(); ArraySearchPhaseResults phaseResults = phaseResults(requestIds, nodeLookups, 0); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + listener, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); action.onPhaseFailure(new SearchPhase("test") { @Override @@ -428,7 +479,14 @@ public void testShardNotAvailableWithDisallowPartialFailures() { ActionListener listener = ActionListener.wrap(response -> fail("onResponse should not be called"), exception::set); int numShards = randomIntBetween(2, 10); ArraySearchPhaseResults phaseResults = new ArraySearchPhaseResults<>(numShards); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + listener, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); // skip one to avoid the "all shards failed" failure. SearchShardIterator skipIterator = new SearchShardIterator(null, null, Collections.emptyList(), null); skipIterator.resetAndSkip(); @@ -450,7 +508,14 @@ public void testShardNotAvailableWithIgnoreUnavailable() { ActionListener listener = ActionListener.wrap(response -> {}, exception::set); int numShards = randomIntBetween(2, 10); ArraySearchPhaseResults phaseResults = new ArraySearchPhaseResults<>(numShards); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + listener, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); // skip one to avoid the "all shards failed" failure. SearchShardIterator skipIterator = new SearchShardIterator(null, null, Collections.emptyList(), null); skipIterator.resetAndSkip(); @@ -521,6 +586,7 @@ public void onFailure(Exception e) { true, false, new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()), shards ); action.run(); @@ -568,6 +634,7 @@ public void onFailure(Exception e) { false, false, new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()), shards ); action.run(); @@ -620,6 +687,7 @@ public void onFailure(Exception e) { false, catchExceptionWhenExecutePhaseOnShard, new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()), shards ); action.run(); @@ -771,7 +839,8 @@ private SearchDfsQueryThenFetchAsyncAction createSearchDfsQueryThenFetchAsyncAct SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(searchRequestOperationsListeners, logger), - searchRequest + searchRequest, + () -> null ), NoopTracer.INSTANCE ); @@ -825,7 +894,8 @@ private SearchQueryThenFetchAsyncAction createSearchQueryThenFetchAsyncAction( SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(searchRequestOperationsListeners, logger), - searchRequest + searchRequest, + () -> null ), NoopTracer.INSTANCE ) { diff --git a/server/src/test/java/org/opensearch/action/search/CanMatchPreFilterSearchPhaseTests.java b/server/src/test/java/org/opensearch/action/search/CanMatchPreFilterSearchPhaseTests.java index 1881c705fe6b3..bb51aeaeee9dd 100644 --- a/server/src/test/java/org/opensearch/action/search/CanMatchPreFilterSearchPhaseTests.java +++ b/server/src/test/java/org/opensearch/action/search/CanMatchPreFilterSearchPhaseTests.java @@ -170,7 +170,7 @@ public void run() throws IOException { } }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -268,7 +268,7 @@ public void run() throws IOException { } }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -366,7 +366,7 @@ public void sendCanMatch( new ArraySearchPhaseResults<>(iter.size()), randomIntBetween(1, 32), SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ) { @Override @@ -396,7 +396,7 @@ protected void executePhaseOnShard( ); }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -488,7 +488,7 @@ public void run() { } }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -595,7 +595,7 @@ public void run() { } }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -658,7 +658,8 @@ public void sendCanMatch( ExecutorService executor = OpenSearchExecutors.newDirectExecutorService(); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); SearchPhaseController controller = new SearchPhaseController( diff --git a/server/src/test/java/org/opensearch/action/search/MockSearchPhaseContext.java b/server/src/test/java/org/opensearch/action/search/MockSearchPhaseContext.java index cc10da8fc1f12..2f3e462f741b8 100644 --- a/server/src/test/java/org/opensearch/action/search/MockSearchPhaseContext.java +++ b/server/src/test/java/org/opensearch/action/search/MockSearchPhaseContext.java @@ -182,6 +182,14 @@ public void addReleasable(Releasable releasable) { // Noop } + /** + * Set the resource usage info for this phase + */ + @Override + public void setPhaseResourceUsages() { + // Noop + } + @Override public void execute(Runnable command) { command.run(); diff --git a/server/src/test/java/org/opensearch/action/search/SearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/SearchAsyncActionTests.java index 35e90ff662b19..8fe2d9af217d5 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchAsyncActionTests.java @@ -162,7 +162,7 @@ public void testSkipSearchShards() throws InterruptedException { new ArraySearchPhaseResults<>(shardsIter.size()), request.getMaxConcurrentShardRequests(), SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, request), + new SearchRequestContext(searchRequestOperationsListener, request, () -> null), NoopTracer.INSTANCE ) { @@ -287,7 +287,7 @@ public void testLimitConcurrentShardRequests() throws InterruptedException { new ArraySearchPhaseResults<>(shardsIter.size()), request.getMaxConcurrentShardRequests(), SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, request), + new SearchRequestContext(searchRequestOperationsListener, request, () -> null), NoopTracer.INSTANCE ) { @@ -409,7 +409,8 @@ public void sendFreeContext(Transport.Connection connection, ShardSearchContextI SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - request + request, + () -> null ), NoopTracer.INSTANCE ) { @@ -537,7 +538,8 @@ public void sendFreeContext(Transport.Connection connection, ShardSearchContextI SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - request + request, + () -> null ), NoopTracer.INSTANCE ) { @@ -657,7 +659,7 @@ public void testAllowPartialResults() throws InterruptedException { new ArraySearchPhaseResults<>(shardsIter.size()), request.getMaxConcurrentShardRequests(), SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, request), + new SearchRequestContext(searchRequestOperationsListener, request, () -> null), NoopTracer.INSTANCE ) { @Override diff --git a/server/src/test/java/org/opensearch/action/search/SearchQueryThenFetchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/SearchQueryThenFetchAsyncActionTests.java index aefbbe80d5fa1..f6a06a51c7b43 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchQueryThenFetchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchQueryThenFetchAsyncActionTests.java @@ -240,7 +240,8 @@ public void sendExecuteQuery( SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ), NoopTracer.INSTANCE ) { diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java index 0f737e00478cb..fdac91a0e3124 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java @@ -25,7 +25,8 @@ default void onPhaseEnd(SearchRequestOperationsListener listener, SearchPhaseCon context, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java index 91a2552ac3f04..453fc6cd8a74c 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java @@ -178,7 +178,8 @@ public void testConcurrentOnRequestEnd() throws InterruptedException { for (int i = 0; i < numRequests; i++) { SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(searchListenersList, logger), - searchRequest + searchRequest, + () -> null ); searchRequestContext.setAbsoluteStartNanos((i < numRequestsLogged) ? 0 : System.nanoTime()); searchRequestContexts.add(searchRequestContext); @@ -209,7 +210,8 @@ public void testSearchRequestSlowLogHasJsonFields_EmptySearchRequestContext() th SearchPhaseContext searchPhaseContext = new MockSearchPhaseContext(1, searchRequest); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); SearchRequestSlowLog.SearchRequestSlowLogMessage p = new SearchRequestSlowLog.SearchRequestSlowLogMessage( searchPhaseContext, @@ -233,7 +235,8 @@ public void testSearchRequestSlowLogHasJsonFields_NotEmptySearchRequestContext() SearchPhaseContext searchPhaseContext = new MockSearchPhaseContext(1, searchRequest); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); searchRequestContext.updatePhaseTookMap(SearchPhaseName.FETCH.getName(), 10L); searchRequestContext.updatePhaseTookMap(SearchPhaseName.QUERY.getName(), 50L); @@ -262,7 +265,8 @@ public void testSearchRequestSlowLogHasJsonFields_PartialContext() throws IOExce SearchPhaseContext searchPhaseContext = new MockSearchPhaseContext(1, searchRequest); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); searchRequestContext.updatePhaseTookMap(SearchPhaseName.FETCH.getName(), 10L); searchRequestContext.updatePhaseTookMap(SearchPhaseName.QUERY.getName(), 50L); @@ -291,7 +295,8 @@ public void testSearchRequestSlowLogSearchContextPrinterToLog() throws IOExcepti SearchPhaseContext searchPhaseContext = new MockSearchPhaseContext(1, searchRequest); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); searchRequestContext.updatePhaseTookMap(SearchPhaseName.FETCH.getName(), 10L); searchRequestContext.updatePhaseTookMap(SearchPhaseName.QUERY.getName(), 50L); diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java index fb9b26e3f3ad1..1af3eb2738a58 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java @@ -60,7 +60,8 @@ public void testSearchRequestStats() { ctx, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertEquals(0, testRequestStats.getPhaseCurrent(searchPhaseName)); @@ -120,7 +121,8 @@ public void testSearchRequestStatsOnPhaseEndConcurrently() throws InterruptedExc ctx, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); countDownLatch.countDown(); diff --git a/server/src/test/java/org/opensearch/action/search/SearchResponseMergerTests.java b/server/src/test/java/org/opensearch/action/search/SearchResponseMergerTests.java index ce4d5ca4f7091..0eefa413c1864 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchResponseMergerTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchResponseMergerTests.java @@ -137,7 +137,8 @@ public void testMergeTookInMillis() throws InterruptedException { SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertEquals(TimeUnit.NANOSECONDS.toMillis(currentRelativeTime), searchResponse.getTook().millis()); @@ -195,7 +196,8 @@ public void testMergeShardFailures() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -252,7 +254,8 @@ public void testMergeShardFailuresNullShardTarget() throws InterruptedException clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -304,7 +307,8 @@ public void testMergeShardFailuresNullShardId() throws InterruptedException { SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ).getShardFailures(); assertThat(Arrays.asList(shardFailures), containsInAnyOrder(expectedFailures.toArray(ShardSearchFailure.EMPTY_ARRAY))); @@ -344,7 +348,8 @@ public void testMergeProfileResults() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -412,7 +417,8 @@ public void testMergeCompletionSuggestions() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -490,7 +496,8 @@ public void testMergeCompletionSuggestionsTieBreak() throws InterruptedException clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -570,7 +577,8 @@ public void testMergeAggs() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -733,7 +741,8 @@ public void testMergeSearchHits() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); @@ -799,7 +808,8 @@ public void testMergeNoResponsesAdded() { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, response.getClusters()); @@ -878,7 +888,8 @@ public void testMergeEmptySearchHitsWithNonEmpty() { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertEquals(10, mergedResponse.getHits().getTotalHits().value); @@ -926,7 +937,8 @@ public void testMergeOnlyEmptyHits() { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertEquals(expectedTotalHits, mergedResponse.getHits().getTotalHits()); diff --git a/server/src/test/java/org/opensearch/action/search/TransportSearchActionTests.java b/server/src/test/java/org/opensearch/action/search/TransportSearchActionTests.java index da19c839f3826..84955d01a59ce 100644 --- a/server/src/test/java/org/opensearch/action/search/TransportSearchActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/TransportSearchActionTests.java @@ -487,7 +487,8 @@ public void testCCSRemoteReduceMergeFails() throws Exception { (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -549,7 +550,8 @@ public void testCCSRemoteReduce() throws Exception { (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -590,7 +592,8 @@ public void testCCSRemoteReduce() throws Exception { (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -652,7 +655,8 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -696,7 +700,8 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -751,7 +756,8 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java index bd71aecf89101..e74962dcbba1b 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java @@ -48,6 +48,7 @@ import org.opensearch.gateway.GatewayMetaState.RemotePersistedState; import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.test.EqualsHashCodeTestUtils; import org.opensearch.test.OpenSearchTestCase; @@ -944,7 +945,8 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep .previousClusterUUID(randomAlphaOfLength(10)) .clusterUUIDCommitted(true) .build(); - Mockito.when(remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID)).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID)) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, ps1); @@ -977,6 +979,8 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep Mockito.verify(remoteClusterStateService, Mockito.times(1)).writeFullMetadata(clusterState, previousClusterUUID); assertThat(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState(), equalTo(clusterState)); + Mockito.when(remoteClusterStateService.markLastStateAsCommitted(any(), any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); coordinationState.handlePreCommit(); ClusterState committedClusterState = ClusterState.builder(clusterState) .metadata(Metadata.builder(clusterState.metadata()).clusterUUIDCommitted(true).build()) diff --git a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java index f84f0326f4a9d..3a7988bcd2bda 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java @@ -274,7 +274,8 @@ protected void onSendRequest( nodeHealthService, persistedStateRegistry, Mockito.mock(RemoteStoreNodeService.class), - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + null ); transportService.start(); transportService.acceptIncomingRequests(); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java index 6d94054afdea2..08e3f47100d8c 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java @@ -37,33 +37,59 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.Diff; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.node.Node; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.transport.TransportService; +import org.junit.Before; import java.io.IOException; import java.util.Collections; +import java.util.Optional; +import java.util.function.Function; + +import org.mockito.Mockito; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; public class PublicationTransportHandlerTests extends OpenSearchTestCase { - public void testDiffSerializationFailure() { - DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue( + private static final long TERM = 5; + private static final long VERSION = 5; + private static final String CLUSTER_NAME = "test-cluster"; + private static final String CLUSTER_UUID = "test-cluster-UUID"; + private static final String MANIFEST_FILE = "path/to/manifest"; + private static final String LOCAL_NODE_ID = "localNode"; + + private DeterministicTaskQueue deterministicTaskQueue; + private TransportService transportService; + private DiscoveryNode localNode; + private DiscoveryNode secondNode; + + @Before + public void setup() { + deterministicTaskQueue = new DeterministicTaskQueue( Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), random() ); final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - final DiscoveryNode localNode = new DiscoveryNode("localNode", buildNewFakeTransportAddress(), Version.CURRENT); - final TransportService transportService = new CapturingTransport().createTransportService( + localNode = new DiscoveryNode(LOCAL_NODE_ID, buildNewFakeTransportAddress(), Version.CURRENT); + secondNode = new DiscoveryNode("secondNode", buildNewFakeTransportAddress(), Version.CURRENT); + transportService = new CapturingTransport().createTransportService( Settings.EMPTY, deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, @@ -72,14 +98,10 @@ public void testDiffSerializationFailure() { Collections.emptySet(), NoopTracer.INSTANCE ); - final PublicationTransportHandler handler = new PublicationTransportHandler( - transportService, - writableRegistry(), - pu -> null, - (pu, l) -> {} - ); - transportService.start(); - transportService.acceptIncomingRequests(); + } + + public void testDiffSerializationFailure() { + final PublicationTransportHandler handler = getPublicationTransportHandler(p -> null, null); final DiscoveryNode otherNode = new DiscoveryNode("otherNode", buildNewFakeTransportAddress(), Version.CURRENT); final ClusterState clusterState = CoordinationStateTests.clusterState( @@ -111,10 +133,181 @@ public void writeTo(StreamOutput out) throws IOException { OpenSearchException e = expectThrows( OpenSearchException.class, - () -> handler.newPublicationContext(new ClusterChangedEvent("test", unserializableClusterState, clusterState)) + () -> handler.newPublicationContext(new ClusterChangedEvent("test", unserializableClusterState, clusterState), false, null) ); assertNotNull(e.getCause()); assertThat(e.getCause(), instanceOf(IOException.class)); assertThat(e.getCause().getMessage(), containsString("Simulated failure of diff serialization")); } + + public void testHandleIncomingRemotePublishRequestWhenNoCurrentPublishRequest() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + localNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + + IllegalStateException e = expectThrows( + IllegalStateException.class, + () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) + ); + assertThat(e.getMessage(), containsString("publication to self failed")); + Mockito.verifyNoInteractions(remoteClusterStateService); + } + + public void testHandleIncomingRemotePublishRequestWhenTermMismatch() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + localNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterState clusterState = buildClusterState(6L, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + IllegalStateException e = expectThrows( + IllegalStateException.class, + () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) + ); + assertThat(e.getMessage(), containsString("publication to self failed")); + Mockito.verifyNoInteractions(remoteClusterStateService); + } + + public void testHandleIncomingRemotePublishRequestWhenVersionMismatch() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + localNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterState clusterState = buildClusterState(TERM, 11L); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + IllegalStateException e = expectThrows( + IllegalStateException.class, + () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) + ); + assertThat(e.getMessage(), containsString("publication to self failed")); + Mockito.verifyNoInteractions(remoteClusterStateService); + } + + public void testHandleIncomingRemotePublishRequestForLocalNode() throws IOException { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + localNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterState clusterState = buildClusterState(TERM, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + PublishWithJoinResponse publishWithJoinResponse = handler.handleIncomingRemotePublishRequest(remotePublishRequest); + assertThat(publishWithJoinResponse, is(expectedPublishResponse)); + Mockito.verifyNoInteractions(remoteClusterStateService); + } + + public void testHandleIncomingRemotePublishRequestWhenManifestNotFound() throws IOException { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + secondNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + when(remoteClusterStateService.getClusterMetadataManifestByFileName(CLUSTER_UUID, MANIFEST_FILE)).thenReturn(null); + ClusterState clusterState = buildClusterState(TERM, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + IllegalStateException e = expectThrows( + IllegalStateException.class, + () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) + ); + assertThat(e.getMessage(), containsString("Publication failed as manifest was not found for")); + Mockito.verify(remoteClusterStateService, times(1)).getClusterMetadataManifestByFileName(Mockito.any(), Mockito.any()); + } + + public void testHandleIncomingRemotePublishRequestWhenNoLastSeenState() throws IOException { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + secondNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterMetadataManifest manifest = ClusterMetadataManifest.builder().clusterTerm(TERM).stateVersion(VERSION).build(); + when(remoteClusterStateService.getClusterMetadataManifestByFileName(CLUSTER_UUID, MANIFEST_FILE)).thenReturn(manifest); + when(remoteClusterStateService.getClusterStateForManifest(CLUSTER_NAME, manifest, LOCAL_NODE_ID, true)).thenReturn( + buildClusterState(TERM, VERSION) + ); + ClusterState clusterState = buildClusterState(TERM, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + PublishWithJoinResponse publishWithJoinResponse = handler.handleIncomingRemotePublishRequest(remotePublishRequest); + assertThat(publishWithJoinResponse, is(expectedPublishResponse)); + Mockito.verify(remoteClusterStateService, times(1)).getClusterMetadataManifestByFileName(Mockito.any(), Mockito.any()); + } + + private PublicationTransportHandler getPublicationTransportHandler( + Function handlePublishRequest, + RemoteClusterStateService remoteClusterStateService + ) { + final PublicationTransportHandler handler = new PublicationTransportHandler( + transportService, + writableRegistry(), + handlePublishRequest, + (pu, l) -> {}, + remoteClusterStateService + ); + transportService.start(); + transportService.acceptIncomingRequests(); + return handler; + } + + private ClusterState buildClusterState(long term, long version) { + CoordinationMetadata.Builder coordMetadataBuilder = CoordinationMetadata.builder().term(term); + Metadata newMetadata = Metadata.builder().coordinationMetadata(coordMetadataBuilder.build()).build(); + DiscoveryNodes nodes = DiscoveryNodes.builder().add(localNode).add(secondNode).localNodeId(LOCAL_NODE_ID).build(); + return ClusterState.builder(ClusterState.EMPTY_STATE).version(version).metadata(newMetadata).nodes(nodes).build(); + } } diff --git a/server/src/test/java/org/opensearch/common/util/concurrent/ThreadContextTests.java b/server/src/test/java/org/opensearch/common/util/concurrent/ThreadContextTests.java index 10669ca1a805b..4e66575711046 100644 --- a/server/src/test/java/org/opensearch/common/util/concurrent/ThreadContextTests.java +++ b/server/src/test/java/org/opensearch/common/util/concurrent/ThreadContextTests.java @@ -344,11 +344,11 @@ public void testResponseHeaders() { } final String value = HeaderWarning.formatWarning("qux"); - threadContext.addResponseHeader("baz", value, s -> HeaderWarning.extractWarningValueFromWarningHeader(s, false)); + threadContext.updateResponseHeader("baz", value, s -> HeaderWarning.extractWarningValueFromWarningHeader(s, false)); // pretend that another thread created the same response at a different time if (randomBoolean()) { final String duplicateValue = HeaderWarning.formatWarning("qux"); - threadContext.addResponseHeader("baz", duplicateValue, s -> HeaderWarning.extractWarningValueFromWarningHeader(s, false)); + threadContext.updateResponseHeader("baz", duplicateValue, s -> HeaderWarning.extractWarningValueFromWarningHeader(s, false)); } threadContext.addResponseHeader("Warning", "One is the loneliest number"); diff --git a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java index 5539b3237c2bf..cd3e8af2a3cd1 100644 --- a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java +++ b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java @@ -131,7 +131,8 @@ private DiscoveryModule newModule(Settings settings, List plugi null, new PersistedStateRegistry(), remoteStoreNodeService, - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + null ); } diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 418e6d8de6adb..7b113961fc2c7 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -69,6 +69,7 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.gateway.remote.RemotePersistenceStats; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.index.remote.RemoteIndexPathUploader; @@ -723,9 +724,11 @@ public void testRemotePersistedState() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder().clusterTerm(1L).stateVersion(5L).build(); final String previousClusterUUID = "prev-cluster-uuid"; - Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); - Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); assertThat(remotePersistedState.getLastAcceptedState(), nullValue()); @@ -754,6 +757,9 @@ public void testRemotePersistedState() throws IOException { assertThat(remotePersistedState.getLastAcceptedState(), equalTo(secondClusterState)); assertThat(remotePersistedState.getCurrentTerm(), equalTo(clusterTerm)); + when(remoteClusterStateService.markLastStateAsCommitted(Mockito.any(), Mockito.any())).thenReturn( + new RemoteClusterStateManifestInfo(manifest, "path/to/manifest") + ); remotePersistedState.markLastAcceptedStateAsCommitted(); Mockito.verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(Mockito.any(), Mockito.any()); @@ -779,9 +785,11 @@ public void testRemotePersistedStateNotCommitted() throws IOException { .build(); Mockito.when(remoteClusterStateService.getLatestClusterMetadataManifest(Mockito.any(), Mockito.any())) .thenReturn(Optional.of(manifest)); - Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); - Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); CoordinationState.PersistedState remotePersistedState = new RemotePersistedState( remoteClusterStateService, ClusterState.UNKNOWN_UUID diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 4a53770c76d88..890a4b478b502 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -43,6 +43,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.indices.IndicesModule; @@ -182,8 +183,11 @@ public void teardown() throws Exception { public void testFailWriteFullMetadataNonClusterManagerNode() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().build(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)); - Assert.assertThat(manifest, nullValue()); + final RemoteClusterStateManifestInfo manifestDetails = remoteClusterStateService.writeFullMetadata( + clusterState, + randomAlphaOfLength(10) + ); + Assert.assertThat(manifestDetails, nullValue()); } public void testFailInitializationWhenRemoteStateDisabled() { @@ -218,7 +222,8 @@ public void testWriteFullMetadataSuccess() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") + .getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); List indices = List.of(uploadedIndexMetadata); @@ -262,7 +267,8 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { }).when(container).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") + .getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); List indices = List.of(uploadedIndexMetadata); @@ -401,8 +407,12 @@ public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOEx public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().build(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata(clusterState, clusterState, null); - Assert.assertThat(manifest, nullValue()); + final RemoteClusterStateManifestInfo manifestDetails = remoteClusterStateService.writeIncrementalMetadata( + clusterState, + clusterState, + null + ); + Assert.assertThat(manifestDetails, nullValue()); assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); } @@ -433,7 +443,7 @@ public void testWriteIncrementalMetadataSuccess() throws IOException { previousClusterState, clusterState, previousManifest - ); + ).getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); final List indices = List.of(uploadedIndexMetadata); @@ -508,7 +518,7 @@ private void verifyCodecMigrationManifest(int previousCodec) throws IOException previousClusterState, newClusterState, previousManifest - ); + ).getClusterMetadataManifest(); // global metadata is updated assertThat(manifestAfterUpdate.hasMetadataAttributesFiles(), is(true)); @@ -545,7 +555,7 @@ private void verifyWriteIncrementalGlobalMetadataFromOlderCodecSuccess(ClusterMe previousClusterState, clusterState, previousManifest - ); + ).getClusterMetadataManifest(); final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() .codecVersion(3) @@ -736,7 +746,8 @@ public void testCustomMetadataDeletedUpdatedAndAdded() throws IOException { // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_"); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") + .getClusterMetadataManifest(); ClusterState clusterState1 = ClusterState.builder(initialClusterState) .metadata( @@ -751,7 +762,7 @@ public void testCustomMetadataDeletedUpdatedAndAdded() throws IOException { initialClusterState, clusterState1, initialManifest - ); + ).getClusterMetadataManifest(); // remove custom1 from the cluster state, update custom2, custom3 is at it is, added custom4 ClusterState clusterState2 = ClusterState.builder(initialClusterState) .metadata( @@ -761,7 +772,8 @@ public void testCustomMetadataDeletedUpdatedAndAdded() throws IOException { .putCustom("custom4", new CustomMetadata1("mock_custom_metadata4")) ) .build(); - ClusterMetadataManifest manifest2 = remoteClusterStateService.writeIncrementalMetadata(clusterState1, clusterState2, manifest1); + ClusterMetadataManifest manifest2 = remoteClusterStateService.writeIncrementalMetadata(clusterState1, clusterState2, manifest1) + .getClusterMetadataManifest(); // custom1 is removed assertFalse(manifest2.getCustomMetadataMap().containsKey("custom1")); // custom2 is updated @@ -811,7 +823,8 @@ public void testIndexMetadataDeletedUpdatedAndAdded() throws IOException { // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_"); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") + .getClusterMetadataManifest(); String initialIndex = "test-index"; Index index1 = new Index("test-index-1", "index-uuid-1"); Index index2 = new Index("test-index-2", "index-uuid-2"); @@ -844,7 +857,7 @@ public void testIndexMetadataDeletedUpdatedAndAdded() throws IOException { initialClusterState, clusterState1, initialManifest - ); + ).getClusterMetadataManifest(); // verify that initial index is removed, and new index are added assertEquals(1, initialManifest.getIndices().size()); assertEquals(2, manifest1.getIndices().size()); @@ -855,7 +868,8 @@ public void testIndexMetadataDeletedUpdatedAndAdded() throws IOException { ClusterState clusterState2 = ClusterState.builder(clusterState1) .metadata(Metadata.builder(clusterState1.getMetadata()).put(indexMetadata1, true).build()) .build(); - ClusterMetadataManifest manifest2 = remoteClusterStateService.writeIncrementalMetadata(clusterState1, clusterState2, manifest1); + ClusterMetadataManifest manifest2 = remoteClusterStateService.writeIncrementalMetadata(clusterState1, clusterState2, manifest1) + .getClusterMetadataManifest(); // index1 is updated assertEquals(2, manifest2.getIndices().size()); assertEquals( @@ -888,7 +902,8 @@ private void verifyMetadataAttributeOnlyUpdated( // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_"); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") + .getClusterMetadataManifest(); ClusterState newClusterState = clusterStateUpdater.apply(initialClusterState); @@ -899,9 +914,10 @@ private void verifyMetadataAttributeOnlyUpdated( initialClusterState, newClusterState, initialManifest - ); + ).getClusterMetadataManifest(); } else { - manifestAfterMetadataUpdate = remoteClusterStateService.writeFullMetadata(newClusterState, initialClusterState.stateUUID()); + manifestAfterMetadataUpdate = remoteClusterStateService.writeFullMetadata(newClusterState, initialClusterState.stateUUID()) + .getClusterMetadataManifest(); } assertions.accept(initialManifest, manifestAfterMetadataUpdate); @@ -1182,7 +1198,8 @@ public void testMarkLastStateAsCommittedSuccess() throws IOException { List indices = List.of(uploadedIndexMetadata); final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder().indices(indices).build(); - final ClusterMetadataManifest manifest = remoteClusterStateService.markLastStateAsCommitted(clusterState, previousManifest); + final ClusterMetadataManifest manifest = remoteClusterStateService.markLastStateAsCommitted(clusterState, previousManifest) + .getClusterMetadataManifest(); final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() .indices(indices) @@ -1287,7 +1304,8 @@ public void testRemoteStateStats() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") + .getClusterMetadataManifest(); assertTrue(remoteClusterStateService.getStats() != null); assertEquals(1, remoteClusterStateService.getStats().getSuccessCount()); diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 3848dce5a14ec..9c58fc8fde084 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2285,7 +2285,8 @@ public void onFailure(final Exception e) { new FetchPhase(Collections.emptyList()), responseCollectorService, new NoneCircuitBreakerService(), - null + null, + new TaskResourceTrackingService(settings, clusterSettings, threadPool) ); SearchPhaseController searchPhaseController = new SearchPhaseController( writableRegistry(), @@ -2320,7 +2321,8 @@ public void onFailure(final Exception e) { ), NoopMetricsRegistry.INSTANCE, searchRequestOperationsCompositeListenerFactory, - NoopTracer.INSTANCE + NoopTracer.INSTANCE, + new TaskResourceTrackingService(settings, clusterSettings, threadPool) ) ); actions.put( @@ -2554,7 +2556,8 @@ public void start(ClusterState initialState) { () -> new StatusInfo(HEALTHY, "healthy-info"), persistedStateRegistry, remoteStoreNodeService, - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + null ); clusterManagerService.setClusterStatePublisher(coordinator); coordinator.start(); diff --git a/server/src/test/java/org/opensearch/tasks/TaskResourceInfoTests.java b/server/src/test/java/org/opensearch/tasks/TaskResourceInfoTests.java new file mode 100644 index 0000000000000..e0bfb8710bbaa --- /dev/null +++ b/server/src/test/java/org/opensearch/tasks/TaskResourceInfoTests.java @@ -0,0 +1,106 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.tasks; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Locale; + +/** + * Test cases for TaskResourceInfo + */ +public class TaskResourceInfoTests extends OpenSearchTestCase { + private final Long cpuUsage = randomNonNegativeLong(); + private final Long memoryUsage = randomNonNegativeLong(); + private final String action = randomAlphaOfLengthBetween(1, 10); + private final Long taskId = randomNonNegativeLong(); + private final Long parentTaskId = randomNonNegativeLong(); + private final String nodeId = randomAlphaOfLengthBetween(1, 10); + private TaskResourceInfo taskResourceInfo; + private TaskResourceUsage taskResourceUsage; + + @Before + public void setUpVariables() { + taskResourceUsage = new TaskResourceUsage(cpuUsage, memoryUsage); + taskResourceInfo = new TaskResourceInfo(action, taskId, parentTaskId, nodeId, taskResourceUsage); + } + + public void testGetters() { + assertEquals(action, taskResourceInfo.getAction()); + assertEquals(taskId.longValue(), taskResourceInfo.getTaskId()); + assertEquals(parentTaskId.longValue(), taskResourceInfo.getParentTaskId()); + assertEquals(nodeId, taskResourceInfo.getNodeId()); + assertEquals(taskResourceUsage, taskResourceInfo.getTaskResourceUsage()); + } + + public void testEqualsAndHashCode() { + TaskResourceInfo taskResourceInfoCopy = new TaskResourceInfo(action, taskId, parentTaskId, nodeId, taskResourceUsage); + assertEquals(taskResourceInfo, taskResourceInfoCopy); + assertEquals(taskResourceInfo.hashCode(), taskResourceInfoCopy.hashCode()); + TaskResourceInfo differentTaskResourceInfo = new TaskResourceInfo( + "differentAction", + taskId, + parentTaskId, + nodeId, + taskResourceUsage + ); + assertNotEquals(taskResourceInfo, differentTaskResourceInfo); + assertNotEquals(taskResourceInfo.hashCode(), differentTaskResourceInfo.hashCode()); + } + + public void testSerialization() throws IOException { + BytesStreamOutput output = new BytesStreamOutput(); + taskResourceInfo.writeTo(output); + StreamInput input = StreamInput.wrap(output.bytes().toBytesRef().bytes); + TaskResourceInfo deserializedTaskResourceInfo = TaskResourceInfo.readFromStream(input); + assertEquals(taskResourceInfo, deserializedTaskResourceInfo); + } + + public void testToString() { + String expectedString = String.format( + Locale.ROOT, + "{\"action\":\"%s\",\"taskId\":%s,\"parentTaskId\":%s,\"nodeId\":\"%s\",\"taskResourceUsage\":{\"cpu_time_in_nanos\":%s,\"memory_in_bytes\":%s}}", + action, + taskId, + parentTaskId, + nodeId, + taskResourceUsage.getCpuTimeInNanos(), + taskResourceUsage.getMemoryInBytes() + ); + assertTrue(expectedString.equals(taskResourceInfo.toString())); + } + + public void testToXContent() throws IOException { + char[] expectedXcontent = String.format( + Locale.ROOT, + "{\"action\":\"%s\",\"taskId\":%s,\"parentTaskId\":%s,\"nodeId\":\"%s\",\"taskResourceUsage\":{\"cpu_time_in_nanos\":%s,\"memory_in_bytes\":%s}}", + action, + taskId, + parentTaskId, + nodeId, + taskResourceUsage.getCpuTimeInNanos(), + taskResourceUsage.getMemoryInBytes() + ).toCharArray(); + + XContentBuilder builder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + char[] xContent = BytesReference.bytes(taskResourceInfo.toXContent(builder, ToXContent.EMPTY_PARAMS)).utf8ToString().toCharArray(); + assertEquals(Arrays.hashCode(expectedXcontent), Arrays.hashCode(xContent)); + } +} diff --git a/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java b/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java index 45d438f8d04c9..0c19c331e1510 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java @@ -9,11 +9,15 @@ package org.opensearch.tasks; import org.opensearch.action.admin.cluster.node.tasks.TransportTasksActionTests; +import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.tasks.TaskId; +import org.opensearch.core.tasks.resourcetracker.ResourceStatsType; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.core.tasks.resourcetracker.ThreadResourceInfo; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; @@ -31,6 +35,7 @@ import static org.opensearch.core.tasks.resourcetracker.ResourceStats.CPU; import static org.opensearch.core.tasks.resourcetracker.ResourceStats.MEMORY; import static org.opensearch.tasks.TaskResourceTrackingService.TASK_ID; +import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; public class TaskResourceTrackingServiceTests extends OpenSearchTestCase { @@ -142,6 +147,36 @@ public void testStartingTrackingHandlesMultipleThreadsPerTask() throws Interrupt assertEquals(numTasks, numExecutions); } + public void testWriteTaskResourceUsage() { + SearchShardTask task = new SearchShardTask(1, "test", "test", "task", TaskId.EMPTY_TASK_ID, new HashMap<>()); + taskResourceTrackingService.setTaskResourceTrackingEnabled(true); + taskResourceTrackingService.startTracking(task); + task.startThreadResourceTracking( + Thread.currentThread().getId(), + ResourceStatsType.WORKER_STATS, + new ResourceUsageMetric(CPU, 100), + new ResourceUsageMetric(MEMORY, 100) + ); + taskResourceTrackingService.writeTaskResourceUsage(task, "node_1"); + Map> headers = threadPool.getThreadContext().getResponseHeaders(); + assertEquals(1, headers.size()); + assertTrue(headers.containsKey(TASK_RESOURCE_USAGE)); + } + + public void testGetTaskResourceUsageFromThreadContext() { + String taskResourceUsageJson = + "{\"action\":\"testAction\",\"taskId\":1,\"parentTaskId\":2,\"nodeId\":\"nodeId\",\"taskResourceUsage\":{\"cpu_time_in_nanos\":1000,\"memory_in_bytes\":2000}}"; + threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceUsageJson); + TaskResourceInfo result = taskResourceTrackingService.getTaskResourceUsageFromThreadContext(); + assertNotNull(result); + assertEquals("testAction", result.getAction()); + assertEquals(1L, result.getTaskId()); + assertEquals(2L, result.getParentTaskId()); + assertEquals("nodeId", result.getNodeId()); + assertEquals(1000L, result.getTaskResourceUsage().getCpuTimeInNanos()); + assertEquals(2000L, result.getTaskResourceUsage().getMemoryInBytes()); + } + private void verifyThreadContextFixedHeaders(String key, String value) { assertEquals(threadPool.getThreadContext().getHeader(key), value); assertEquals(threadPool.getThreadContext().getTransient(key), value); diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java index 6d395085b12ea..ceda373df1357 100644 --- a/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java +++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java @@ -11,6 +11,7 @@ import org.opensearch.telemetry.metrics.Counter; import org.opensearch.telemetry.metrics.Histogram; import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.TaggedMeasurement; import org.opensearch.telemetry.metrics.tags.Tags; import java.io.Closeable; @@ -66,6 +67,11 @@ public Closeable createGauge(String name, String description, String unit, Suppl return null; } + @Override + public Closeable createGauge(String name, String description, String unit, Supplier value) { + return null; + } + @Override public void close() throws IOException {} } diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index 1c2270bab1260..b432e5411404e 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -1184,7 +1184,8 @@ protected Optional getDisruptableMockTransport(Transpo nodeHealthService, persistedStateRegistry, remoteStoreNodeService, - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + null ); clusterManagerService.setClusterStatePublisher(coordinator); final GatewayService gatewayService = new GatewayService( diff --git a/test/framework/src/main/java/org/opensearch/node/MockNode.java b/test/framework/src/main/java/org/opensearch/node/MockNode.java index e6c7e21d5b3ea..19c65ec169d3c 100644 --- a/test/framework/src/main/java/org/opensearch/node/MockNode.java +++ b/test/framework/src/main/java/org/opensearch/node/MockNode.java @@ -60,6 +60,7 @@ import org.opensearch.search.SearchService; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.query.QueryPhase; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.test.MockHttpTransport; import org.opensearch.test.transport.MockTransportService; @@ -155,7 +156,8 @@ protected SearchService newSearchService( FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, - Executor indexSearcherExecutor + Executor indexSearcherExecutor, + TaskResourceTrackingService taskResourceTrackingService ) { if (getPluginsService().filterPlugins(MockSearchService.TestPlugin.class).isEmpty()) { return super.newSearchService( @@ -168,7 +170,8 @@ protected SearchService newSearchService( fetchPhase, responseCollectorService, circuitBreakerService, - indexSearcherExecutor + indexSearcherExecutor, + taskResourceTrackingService ); } return new MockSearchService( @@ -180,7 +183,8 @@ protected SearchService newSearchService( queryPhase, fetchPhase, circuitBreakerService, - indexSearcherExecutor + indexSearcherExecutor, + taskResourceTrackingService ); } diff --git a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java index a0bbcb7be05f9..6c9ace06c8219 100644 --- a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java +++ b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java @@ -42,6 +42,7 @@ import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.internal.ReaderContext; import org.opensearch.search.query.QueryPhase; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import java.util.HashMap; @@ -96,7 +97,8 @@ public MockSearchService( QueryPhase queryPhase, FetchPhase fetchPhase, CircuitBreakerService circuitBreakerService, - Executor indexSearcherExecutor + Executor indexSearcherExecutor, + TaskResourceTrackingService taskResourceTrackingService ) { super( clusterService, @@ -108,7 +110,8 @@ public MockSearchService( fetchPhase, null, circuitBreakerService, - indexSearcherExecutor + indexSearcherExecutor, + taskResourceTrackingService ); } diff --git a/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java b/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java index 4ba130343e889..e9d8ddd06fcba 100644 --- a/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java +++ b/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java @@ -13,6 +13,7 @@ import org.opensearch.telemetry.metrics.Counter; import org.opensearch.telemetry.metrics.Histogram; import org.opensearch.telemetry.metrics.MetricsTelemetry; +import org.opensearch.telemetry.metrics.TaggedMeasurement; import org.opensearch.telemetry.metrics.noop.NoopCounter; import org.opensearch.telemetry.metrics.noop.NoopHistogram; import org.opensearch.telemetry.metrics.tags.Tags; @@ -62,6 +63,11 @@ public Closeable createGauge(String name, String description, String unit, Suppl return () -> {}; } + @Override + public Closeable createGauge(String name, String description, String unit, Supplier value) { + return () -> {}; + } + @Override public void close() {