Skip to content

Commit 1a939e9

Browse files
[ML] Create and inject APM Inference Metrics (#111293)
We are migrating from in-memory cumulative counter to an Time Series Data Stream delta counter. The goal is to avoid metrics suddenly dropping to zero when a node restarts, hopefully increasing accuracy of the metric. Co-authored-by: Jonathan Buttner <[email protected]>
1 parent 38f301a commit 1a939e9

File tree

12 files changed

+142
-307
lines changed

12 files changed

+142
-307
lines changed

server/src/main/java/org/elasticsearch/inference/ServiceSettings.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
package org.elasticsearch.inference;
1010

1111
import org.elasticsearch.common.io.stream.VersionedNamedWriteable;
12+
import org.elasticsearch.core.Nullable;
1213
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
1314
import org.elasticsearch.xcontent.ToXContentObject;
1415

@@ -48,5 +49,6 @@ default DenseVectorFieldMapper.ElementType elementType() {
4849
* be chosen when initializing a deployment within their service. In this situation, return null.
4950
* @return the model used to perform inference or null if the model is not defined
5051
*/
52+
@Nullable
5153
String modelId();
5254
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.indices.SystemIndexDescriptor;
2727
import org.elasticsearch.inference.InferenceServiceExtension;
2828
import org.elasticsearch.inference.InferenceServiceRegistry;
29+
import org.elasticsearch.node.PluginComponentBinding;
2930
import org.elasticsearch.plugins.ActionPlugin;
3031
import org.elasticsearch.plugins.ExtensiblePlugin;
3132
import org.elasticsearch.plugins.MapperPlugin;
@@ -84,8 +85,8 @@
8485
import org.elasticsearch.xpack.inference.services.huggingface.elser.HuggingFaceElserService;
8586
import org.elasticsearch.xpack.inference.services.mistral.MistralService;
8687
import org.elasticsearch.xpack.inference.services.openai.OpenAiService;
87-
import org.elasticsearch.xpack.inference.telemetry.InferenceAPMStats;
88-
import org.elasticsearch.xpack.inference.telemetry.StatsMap;
88+
import org.elasticsearch.xpack.inference.telemetry.ApmInferenceStats;
89+
import org.elasticsearch.xpack.inference.telemetry.InferenceStats;
8990

9091
import java.util.ArrayList;
9192
import java.util.Collection;
@@ -196,10 +197,10 @@ public Collection<?> createComponents(PluginServices services) {
196197
var actionFilter = new ShardBulkInferenceActionFilter(registry, modelRegistry);
197198
shardBulkInferenceActionFilter.set(actionFilter);
198199

199-
var statsFactory = new InferenceAPMStats.Factory(services.telemetryProvider().getMeterRegistry());
200-
var statsMap = new StatsMap<>(InferenceAPMStats::key, statsFactory::newInferenceRequestAPMCounter);
200+
var meterRegistry = services.telemetryProvider().getMeterRegistry();
201+
var stats = new PluginComponentBinding<>(InferenceStats.class, ApmInferenceStats.create(meterRegistry));
201202

202-
return List.of(modelRegistry, registry, httpClientManager, statsMap);
203+
return List.of(modelRegistry, registry, httpClientManager, stats);
203204
}
204205

205206
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,26 @@
2121
import org.elasticsearch.transport.TransportService;
2222
import org.elasticsearch.xpack.core.inference.action.InferenceAction;
2323
import org.elasticsearch.xpack.inference.registry.ModelRegistry;
24+
import org.elasticsearch.xpack.inference.telemetry.InferenceStats;
2425

2526
public class TransportInferenceAction extends HandledTransportAction<InferenceAction.Request, InferenceAction.Response> {
2627

2728
private final ModelRegistry modelRegistry;
2829
private final InferenceServiceRegistry serviceRegistry;
30+
private final InferenceStats inferenceStats;
2931

3032
@Inject
3133
public TransportInferenceAction(
3234
TransportService transportService,
3335
ActionFilters actionFilters,
3436
ModelRegistry modelRegistry,
35-
InferenceServiceRegistry serviceRegistry
37+
InferenceServiceRegistry serviceRegistry,
38+
InferenceStats inferenceStats
3639
) {
3740
super(InferenceAction.NAME, transportService, actionFilters, InferenceAction.Request::new, EsExecutors.DIRECT_EXECUTOR_SERVICE);
3841
this.modelRegistry = modelRegistry;
3942
this.serviceRegistry = serviceRegistry;
43+
this.inferenceStats = inferenceStats;
4044
}
4145

4246
@Override
@@ -76,6 +80,7 @@ protected void doExecute(Task task, InferenceAction.Request request, ActionListe
7680
unparsedModel.settings(),
7781
unparsedModel.secrets()
7882
);
83+
inferenceStats.incrementRequestCount(model);
7984
inferOnService(model, request, service.get(), delegate);
8085
});
8186

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/embeddings/CohereEmbeddingsModel.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public static CohereEmbeddingsModel of(CohereEmbeddingsModel model, Map<String,
2828
}
2929

3030
public CohereEmbeddingsModel(
31-
String modelId,
31+
String inferenceId,
3232
TaskType taskType,
3333
String service,
3434
Map<String, Object> serviceSettings,
@@ -37,7 +37,7 @@ public CohereEmbeddingsModel(
3737
ConfigurationParseContext context
3838
) {
3939
this(
40-
modelId,
40+
inferenceId,
4141
taskType,
4242
service,
4343
CohereEmbeddingsServiceSettings.fromMap(serviceSettings, context),

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ public OpenAiEmbeddingsServiceSettings(
150150
@Nullable RateLimitSettings rateLimitSettings
151151
) {
152152
this.uri = uri;
153-
this.modelId = modelId;
153+
this.modelId = Objects.requireNonNull(modelId);
154154
this.organizationId = organizationId;
155155
this.similarity = similarity;
156156
this.dimensions = dimensions;
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.inference.telemetry;
9+
10+
import org.elasticsearch.inference.Model;
11+
import org.elasticsearch.telemetry.metric.LongCounter;
12+
import org.elasticsearch.telemetry.metric.MeterRegistry;
13+
14+
import java.util.HashMap;
15+
import java.util.Objects;
16+
17+
public class ApmInferenceStats implements InferenceStats {
18+
private final LongCounter inferenceAPMRequestCounter;
19+
20+
public ApmInferenceStats(LongCounter inferenceAPMRequestCounter) {
21+
this.inferenceAPMRequestCounter = Objects.requireNonNull(inferenceAPMRequestCounter);
22+
}
23+
24+
@Override
25+
public void incrementRequestCount(Model model) {
26+
var service = model.getConfigurations().getService();
27+
var taskType = model.getTaskType();
28+
var modelId = model.getServiceSettings().modelId();
29+
30+
var attributes = new HashMap<String, Object>(5);
31+
attributes.put("service", service);
32+
attributes.put("task_type", taskType.toString());
33+
if (modelId != null) {
34+
attributes.put("model_id", modelId);
35+
}
36+
37+
inferenceAPMRequestCounter.incrementBy(1, attributes);
38+
}
39+
40+
public static ApmInferenceStats create(MeterRegistry meterRegistry) {
41+
return new ApmInferenceStats(
42+
meterRegistry.registerLongCounter(
43+
"es.inference.requests.count.total",
44+
"Inference API request counts for a particular service, task type, model ID",
45+
"operations"
46+
)
47+
);
48+
}
49+
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceAPMStats.java

Lines changed: 0 additions & 47 deletions
This file was deleted.

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java

Lines changed: 7 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,52 +8,14 @@
88
package org.elasticsearch.xpack.inference.telemetry;
99

1010
import org.elasticsearch.inference.Model;
11-
import org.elasticsearch.inference.TaskType;
12-
import org.elasticsearch.xpack.core.inference.InferenceRequestStats;
1311

14-
import java.util.Objects;
15-
import java.util.concurrent.atomic.LongAdder;
12+
public interface InferenceStats {
1613

17-
public class InferenceStats implements Stats {
18-
protected final String service;
19-
protected final TaskType taskType;
20-
protected final String modelId;
21-
protected final LongAdder counter = new LongAdder();
14+
/**
15+
* Increment the counter for a particular value in a thread safe manner.
16+
* @param model the model to increment request count for
17+
*/
18+
void incrementRequestCount(Model model);
2219

23-
public static String key(Model model) {
24-
StringBuilder builder = new StringBuilder();
25-
builder.append(model.getConfigurations().getService());
26-
builder.append(":");
27-
builder.append(model.getTaskType());
28-
29-
if (model.getServiceSettings().modelId() != null) {
30-
builder.append(":");
31-
builder.append(model.getServiceSettings().modelId());
32-
}
33-
34-
return builder.toString();
35-
}
36-
37-
public InferenceStats(Model model) {
38-
Objects.requireNonNull(model);
39-
40-
service = model.getConfigurations().getService();
41-
taskType = model.getTaskType();
42-
modelId = model.getServiceSettings().modelId();
43-
}
44-
45-
@Override
46-
public void increment() {
47-
counter.increment();
48-
}
49-
50-
@Override
51-
public long getCount() {
52-
return counter.sum();
53-
}
54-
55-
@Override
56-
public InferenceRequestStats toSerializableForm() {
57-
return new InferenceRequestStats(service, taskType, modelId, getCount());
58-
}
20+
InferenceStats NOOP = model -> {};
5921
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/Stats.java

Lines changed: 0 additions & 30 deletions
This file was deleted.

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/StatsMap.java

Lines changed: 0 additions & 57 deletions
This file was deleted.

0 commit comments

Comments
 (0)