From f52f5e2e13ce69707af5c00a55ad5ec200d83798 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Wed, 7 Jan 2026 20:01:06 +0100 Subject: [PATCH 01/48] Add Mixedbread AI Rerank support --- .../InferenceNamedWriteablesProvider.java | 16 + .../xpack/inference/InferencePlugin.java | 2 + .../mixedbread/MixedbreadAccount.java | 40 +++ .../mixedbread/MixedbreadConstants.java | 29 ++ .../services/mixedbread/MixedbreadModel.java | 82 +++++ .../MixedbreadRateLimitServiceSettings.java | 18 + .../mixedbread/MixedbreadResponseHandler.java | 68 ++++ .../mixedbread/MixedbreadService.java | 309 ++++++++++++++++++ .../action/MixedbreadActionCreator.java | 88 +++++ .../action/MixedbreadActionVisitor.java | 30 ++ .../mixedbread/request/MixedbreadRequest.java | 116 +++++++ .../request/MixedbreadRerankRequest.java | 78 +++++ .../MixedbreadRerankRequestEntity.java | 63 ++++ .../rerank/MixedbreadRerankModel.java | 91 ++++++ .../MixedbreadRerankRequestTaskSettings.java | 48 +++ .../MixedbreadRerankResponseHandler.java | 24 ++ .../MixedbreadRerankServiceSettings.java | 152 +++++++++ .../rerank/MixedbreadRerankTaskSettings.java | 160 +++++++++ .../response/MixedbreadErrorResponse.java | 29 ++ .../MixedbreadRerankResponseEntity.java | 142 ++++++++ 20 files changed, 1585 insertions(+) create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadAccount.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadResponseHandler.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionVisitor.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankRequestTaskSettings.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadErrorResponse.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java index b5f2bc89e0ec8..e177578a5dc66 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java @@ -114,6 +114,8 @@ import org.elasticsearch.xpack.inference.services.llama.embeddings.LlamaEmbeddingsServiceSettings; import org.elasticsearch.xpack.inference.services.mistral.completion.MistralChatCompletionServiceSettings; import org.elasticsearch.xpack.inference.services.mistral.embeddings.MistralEmbeddingsServiceSettings; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettings; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; import org.elasticsearch.xpack.inference.services.nvidia.completion.NvidiaChatCompletionServiceSettings; import org.elasticsearch.xpack.inference.services.nvidia.embeddings.NvidiaEmbeddingsServiceSettings; import org.elasticsearch.xpack.inference.services.nvidia.embeddings.NvidiaEmbeddingsTaskSettings; @@ -186,6 +188,7 @@ public static List getNamedWriteables() { addAi21NamedWriteables(namedWriteables); addOpenShiftAiNamedWriteables(namedWriteables); addNvidiaNamedWriteables(namedWriteables); + addMixedbreadNamedWriteables(namedWriteables); addUnifiedNamedWriteables(namedWriteables); @@ -934,4 +937,17 @@ private static void addElasticNamedWriteables(List ) ); } + + private static void addMixedbreadNamedWriteables(List namedWriteables) { + namedWriteables.add( + new NamedWriteableRegistry.Entry( + ServiceSettings.class, + MixedbreadRerankServiceSettings.NAME, + MixedbreadRerankServiceSettings::new + ) + ); + namedWriteables.add( + new NamedWriteableRegistry.Entry(TaskSettings.class, MixedbreadRerankTaskSettings.NAME, MixedbreadRerankTaskSettings::new) + ); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 029731921d1da..9560e2b69f4e5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -174,6 +174,7 @@ import org.elasticsearch.xpack.inference.services.jinaai.JinaAIService; import org.elasticsearch.xpack.inference.services.llama.LlamaService; import org.elasticsearch.xpack.inference.services.mistral.MistralService; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; import org.elasticsearch.xpack.inference.services.nvidia.NvidiaService; import org.elasticsearch.xpack.inference.services.openai.OpenAiService; import org.elasticsearch.xpack.inference.services.openshiftai.OpenShiftAiService; @@ -580,6 +581,7 @@ public List getInferenceServiceFactories() { context -> new GoogleAiStudioService(httpFactory.get(), serviceComponents.get(), context), context -> new GoogleVertexAiService(httpFactory.get(), serviceComponents.get(), context), context -> new MistralService(httpFactory.get(), serviceComponents.get(), context), + context -> new MixedbreadService(httpFactory.get(), serviceComponents.get(), context), context -> new AnthropicService(httpFactory.get(), serviceComponents.get(), context), context -> new AmazonBedrockService(httpFactory.get(), amazonBedrockFactory.get(), serviceComponents.get(), context), context -> new AlibabaCloudSearchService(httpFactory.get(), serviceComponents.get(), context), diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadAccount.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadAccount.java new file mode 100644 index 0000000000000..dc795efa80b7b --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadAccount.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread; + +import org.apache.http.client.utils.URIBuilder; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.rest.RestStatus; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Objects; + +public record MixedbreadAccount(URI baseUri, SecureString apiKey) { + + public static MixedbreadAccount of(MixedbreadModel model) { + try { + var uri = model.baseUri() != null ? model.baseUri() : new URIBuilder().setScheme("https").setHost("api.mixedbread.com").build(); + return new MixedbreadAccount(uri, model.apiKey()); + } catch (URISyntaxException e) { + // using bad request here so that potentially sensitive URL information does not get logged + throw new ElasticsearchStatusException( + Strings.format("Failed to construct %s URL", MixedbreadService.NAME), + RestStatus.BAD_REQUEST, + e + ); + } + } + + public MixedbreadAccount { + Objects.requireNonNull(baseUri); + Objects.requireNonNull(apiKey); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java new file mode 100644 index 0000000000000..033f800fc8d6b --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread; + +public class MixedbreadConstants { + public static final String RERANK_URI_PATH = "/v1/rerank"; + + // common service settings fields + public static final String API_KEY_FIELD = "api_key"; + + public static final String MODEL_FIELD = "model"; + + // embeddings service and request settings + public static final String INPUT_FIELD = "input"; + + // rerank task settings fields + public static final String QUERY_FIELD = "query"; + + // rerank task settings fields + public static final String RETURN_DOCUMENTS_FIELD = "return_documents"; + public static final String TOP_K_FIELD = "top_k"; + + private MixedbreadConstants() {} +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java new file mode 100644 index 0000000000000..0409dd39b45d7 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread; + +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel; +import org.elasticsearch.xpack.inference.services.ServiceUtils; +import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor; +import org.elasticsearch.xpack.inference.services.settings.ApiKeySecrets; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.net.URI; +import java.util.Map; +import java.util.Objects; + +/** + * Abstract class representing a Mixedbread model for inference. + * This class extends RateLimitGroupingModel and provides common functionality for Mixedbread models. + */ +public abstract class MixedbreadModel extends RateLimitGroupingModel { + private final SecureString apiKey; + private final MixedbreadRateLimitServiceSettings rateLimitServiceSettings; + + public MixedbreadModel( + ModelConfigurations configurations, + ModelSecrets secrets, + @Nullable ApiKeySecrets apiKeySecrets, + MixedbreadRateLimitServiceSettings rateLimitServiceSettings + ) { + super(configurations, secrets); + + this.rateLimitServiceSettings = Objects.requireNonNull(rateLimitServiceSettings); + apiKey = ServiceUtils.apiKey(apiKeySecrets); + } + + protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) { + super(model, taskSettings); + + rateLimitServiceSettings = model.rateLimitServiceSettings(); + apiKey = model.apiKey(); + } + + protected MixedbreadModel(MixedbreadModel model, ServiceSettings serviceSettings) { + super(model, serviceSettings); + + rateLimitServiceSettings = model.rateLimitServiceSettings(); + apiKey = model.apiKey(); + } + + public SecureString apiKey() { + return apiKey; + } + + public MixedbreadRateLimitServiceSettings rateLimitServiceSettings() { + return rateLimitServiceSettings; + } + + public abstract ExecutableAction accept(MixedbreadActionVisitor creator, Map taskSettings); + + public RateLimitSettings rateLimitSettings() { + return rateLimitServiceSettings.rateLimitSettings(); + } + + public int rateLimitGroupingHash() { + return apiKey().hashCode(); + } + + public URI baseUri() { + return rateLimitServiceSettings.uri(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java new file mode 100644 index 0000000000000..6bbd42dea100b --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread; + +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.net.URI; + +public interface MixedbreadRateLimitServiceSettings { + RateLimitSettings rateLimitSettings(); + + URI uri(); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadResponseHandler.java new file mode 100644 index 0000000000000..b22205050d1ac --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadResponseHandler.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread; + +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; +import org.elasticsearch.xpack.inference.external.http.retry.RetryException; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.services.cohere.response.CohereErrorResponseEntity; + +import java.util.concurrent.Flow; + +public class MixedbreadResponseHandler extends BaseResponseHandler { + static final String TEXTS_ARRAY_TOO_LARGE_MESSAGE_MATCHER = "invalid request: total number of texts must be at most"; + static final String TEXTS_ARRAY_ERROR_MESSAGE = "Received a texts array too large response"; + + public MixedbreadResponseHandler(String requestType, ResponseParser parseFunction, boolean canHandleStreamingResponse) { + super(requestType, parseFunction, CohereErrorResponseEntity::fromResponse, canHandleStreamingResponse); + } + + @Override + protected void checkForFailureStatusCode(Request request, HttpResult result) throws RetryException { + if (result.isSuccessfulResponse()) { + return; + } + + // handle error codes + int statusCode = result.response().getStatusLine().getStatusCode(); + if (statusCode == 500) { + throw new RetryException(true, buildError(SERVER_ERROR, request, result)); + } else if (statusCode > 500) { + throw new RetryException(false, buildError(SERVER_ERROR, request, result)); + } else if (statusCode == 429) { + throw new RetryException(true, buildError(RATE_LIMIT, request, result)); + } else if (isTextsArrayTooLarge(result)) { + throw new RetryException(false, buildError(TEXTS_ARRAY_ERROR_MESSAGE, request, result)); + } else if (statusCode == 401) { + throw new RetryException(false, buildError(AUTHENTICATION, request, result)); + } else if (statusCode >= 300 && statusCode < 400) { + throw new RetryException(false, buildError(REDIRECTION, request, result)); + } else { + throw new RetryException(false, buildError(UNSUCCESSFUL, request, result)); + } + } + + @Override + public InferenceServiceResults parseResult(Request request, Flow.Publisher flow) { + return super.parseResult(request, flow); + } + + private static boolean isTextsArrayTooLarge(HttpResult result) { + int statusCode = result.response().getStatusLine().getStatusCode(); + + if (statusCode == 400) { + var errorEntity = CohereErrorResponseEntity.fromResponse(result); + return errorEntity != null && errorEntity.getErrorMessage().contains(TEXTS_ARRAY_TOO_LARGE_MESSAGE_MATCHER); + } + + return false; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java new file mode 100644 index 0000000000000..a2ef58581efdd --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -0,0 +1,309 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.util.LazyInitializable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.ChunkInferenceInput; +import org.elasticsearch.inference.ChunkedInference; +import org.elasticsearch.inference.ChunkingSettings; +import org.elasticsearch.inference.InferenceServiceConfiguration; +import org.elasticsearch.inference.InferenceServiceExtension; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.RerankingInferenceService; +import org.elasticsearch.inference.SettingsConfiguration; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.inference.configuration.SettingsConfigurationFieldType; +import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsBuilder; +import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; +import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.SenderService; +import org.elasticsearch.xpack.inference.services.ServiceComponents; +import org.elasticsearch.xpack.inference.services.ServiceUtils; +import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionCreator; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; +import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidTaskTypeException; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwUnsupportedUnifiedCompletionOperation; + +public class MixedbreadService extends SenderService implements RerankingInferenceService { + public static final String NAME = "mixedbread"; + + private static final String SERVICE_NAME = "Mixedbread"; + private static final EnumSet supportedTaskTypes = EnumSet.of(TaskType.RERANK); + + public static final EnumSet VALID_INPUT_TYPE_VALUES = EnumSet.of( + InputType.INGEST, + InputType.SEARCH, + InputType.CLASSIFICATION, + InputType.CLUSTERING, + InputType.INTERNAL_INGEST, + InputType.INTERNAL_SEARCH + ); + + // TODO Batching - We'll instantiate a batching class within the services that want to support it and pass it through to + // the Cohere*RequestManager via the CohereActionCreator class + // The reason it needs to be done here is that the batching logic needs to hold state but the *RequestManagers are instantiated + // on every request + + public MixedbreadService( + HttpRequestSender.Factory factory, + ServiceComponents serviceComponents, + InferenceServiceExtension.InferenceServiceFactoryContext context + ) { + this(factory, serviceComponents, context.clusterService()); + } + + public MixedbreadService(HttpRequestSender.Factory factory, ServiceComponents serviceComponents, ClusterService clusterService) { + super(factory, serviceComponents, clusterService); + } + + @Override + public String name() { + return NAME; + } + + @Override + public void parseRequestConfig( + String inferenceEntityId, + TaskType taskType, + Map config, + ActionListener parsedModelListener + ) { + try { + Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + + ChunkingSettings chunkingSettings = null; + + MixedbreadModel model = createModel( + inferenceEntityId, + taskType, + serviceSettingsMap, + taskSettingsMap, + chunkingSettings, + serviceSettingsMap, + ConfigurationParseContext.REQUEST + ); + +// throwIfNotEmptyMap(config, NAME); +// throwIfNotEmptyMap(serviceSettingsMap, NAME); +// throwIfNotEmptyMap(taskSettingsMap, NAME); + + parsedModelListener.onResponse(model); + } catch (Exception e) { + parsedModelListener.onFailure(e); + } + } + + private static MixedbreadModel createModelWithoutLoggingDeprecations( + String inferenceEntityId, + TaskType taskType, + Map serviceSettings, + Map taskSettings, + ChunkingSettings chunkingSettings, + @Nullable Map secretSettings + ) { + return createModel( + inferenceEntityId, + taskType, + serviceSettings, + taskSettings, + chunkingSettings, + secretSettings, + ConfigurationParseContext.PERSISTENT + ); + } + + private static MixedbreadModel createModel( + String inferenceEntityId, + TaskType taskType, + Map serviceSettings, + Map taskSettings, + ChunkingSettings chunkingSettings, + @Nullable Map secretSettings, + ConfigurationParseContext context + ) { + return switch (taskType) { + case RERANK -> new MixedbreadRerankModel( + inferenceEntityId, serviceSettings, taskSettings, secretSettings, context); + default -> throw createInvalidTaskTypeException(inferenceEntityId, NAME, taskType, context); + }; + } + + @Override + public MixedbreadModel parsePersistedConfigWithSecrets( + String inferenceEntityId, + TaskType taskType, + Map config, + Map secrets + ) { + Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + Map secretSettingsMap = removeFromMapOrThrowIfNull(secrets, ModelSecrets.SECRET_SETTINGS); + + ChunkingSettings chunkingSettings = null; + + return createModelWithoutLoggingDeprecations( + inferenceEntityId, + taskType, + serviceSettingsMap, + taskSettingsMap, + chunkingSettings, + secretSettingsMap + ); + } + + @Override + public MixedbreadModel parsePersistedConfig(String inferenceEntityId, TaskType taskType, Map config) { + Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + + ChunkingSettings chunkingSettings = null; + if (TaskType.TEXT_EMBEDDING.equals(taskType)) { + chunkingSettings = ChunkingSettingsBuilder.fromMap(removeFromMap(config, ModelConfigurations.CHUNKING_SETTINGS)); + } + + return createModelWithoutLoggingDeprecations( + inferenceEntityId, + taskType, + serviceSettingsMap, + taskSettingsMap, + chunkingSettings, + null + ); + } + + @Override + public InferenceServiceConfiguration getConfiguration() { + return org.elasticsearch.xpack.inference.services.cohere.CohereService.Configuration.get(); + } + + @Override + public EnumSet supportedTaskTypes() { + return supportedTaskTypes; + } + + @Override + protected void doUnifiedCompletionInfer( + Model model, + UnifiedChatInput inputs, + TimeValue timeout, + ActionListener listener + ) { + throwUnsupportedUnifiedCompletionOperation(NAME); + } + + @Override + protected void doChunkedInfer( + Model model, List inputs, Map taskSettings, + InputType inputType, TimeValue timeout, ActionListener> listener) { + + } + + @Override + public void doInfer( + Model model, + InferenceInputs inputs, + Map taskSettings, + TimeValue timeout, + ActionListener listener + ) { + if (model instanceof MixedbreadModel == false) { + listener.onFailure(createInvalidModelException(model)); + return; + } + + MixedbreadModel mixedbreadModel = (MixedbreadModel) model; + var actionCreator = new MixedbreadActionCreator(getSender(), getServiceComponents()); + + var action = mixedbreadModel.accept(actionCreator, taskSettings); + action.execute(inputs, timeout, listener); + } + + @Override + protected void validateInputType(InputType inputType, Model model, ValidationException validationException) { + ServiceUtils.validateInputTypeAgainstAllowlist(inputType, VALID_INPUT_TYPE_VALUES, SERVICE_NAME, validationException); + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersion.minimumCompatible(); + } + + @Override + public Set supportedStreamingTasks() { + return COMPLETION_ONLY; + } + + @Override + public int rerankerWindowSize(String modelId) { + // Cohere rerank model truncates at 4096 tokens https://docs.cohere.com/reference/rerank + // Using 1 token = 0.75 words as a rough estimate, we get 3072 words + // allowing for some headroom, we set the window size below 3072 + return 2800; + } + + public static class Configuration { + public static InferenceServiceConfiguration get() { + return configuration.getOrCompute(); + } + + private static final LazyInitializable configuration = new LazyInitializable<>( + () -> { + var configurationMap = new HashMap(); + + configurationMap.put( + MODEL_ID, + new SettingsConfiguration.Builder(supportedTaskTypes).setDescription( + "The name of the model to use for the inference task." + ) + .setLabel("Model ID") + .setRequired(true) + .setSensitive(false) + .setUpdatable(false) + .setType(SettingsConfigurationFieldType.STRING) + .build() + ); + + configurationMap.putAll(DefaultSecretSettings.toSettingsConfiguration(supportedTaskTypes)); + configurationMap.putAll(RateLimitSettings.toSettingsConfiguration(supportedTaskTypes)); + + return new InferenceServiceConfiguration.Builder().setService(NAME) + .setName(SERVICE_NAME) + .setTaskTypes(supportedTaskTypes) + .setConfigurations(configurationMap) + .build(); + } + ); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java new file mode 100644 index 0000000000000..df07599bd1572 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.action; + +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler; +import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.QueryAndDocsInputs; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.services.ServiceComponents; +import org.elasticsearch.xpack.inference.services.mixedbread.request.MixedbreadRerankRequest; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankResponseHandler; +import org.elasticsearch.xpack.inference.services.mixedbread.response.MixedbreadRerankResponseEntity; + +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.core.Strings.format; + +public class MixedbreadActionCreator implements MixedbreadActionVisitor { + private static final String FAILED_TO_SEND_REQUEST_ERROR_MESSAGE = "Failed to send Mixedbread %s request from inference entity id [%s]"; + private static final String INVALID_REQUEST_TYPE_MESSAGE = "Invalid request type: expected Mixedbread %s request but got %s"; + + private static final ResponseHandler RERANK_HANDLER = new MixedbreadRerankResponseHandler("mixedbread rerank", (request, response) -> { + if ((request instanceof MixedbreadRerankRequest) == false) { + var errorMessage = format( + INVALID_REQUEST_TYPE_MESSAGE, + "RERANK", + request != null ? request.getClass().getSimpleName() : "null" + ); + throw new IllegalArgumentException(errorMessage); + } + return MixedbreadRerankResponseEntity.fromResponse(response); + }); + + private final Sender sender; + private final ServiceComponents serviceComponents; + + /** + * Constructs a new MixedbreadActionCreator with the specified sender and service components. + * + * @param sender the sender to use for executing actions + * @param serviceComponents the service components providing necessary services + */ + public MixedbreadActionCreator(Sender sender, ServiceComponents serviceComponents) { + this.sender = Objects.requireNonNull(sender); + this.serviceComponents = Objects.requireNonNull(serviceComponents); + } + + @Override + public ExecutableAction create(MixedbreadRerankModel model, Map taskSettings) { + var overriddenModel = MixedbreadRerankModel.of(model, taskSettings); + var manager = new GenericRequestManager<>( + serviceComponents.threadPool(), + overriddenModel, + RERANK_HANDLER, + inputs -> new MixedbreadRerankRequest( + inputs.getQuery(), + inputs.getChunks(), + inputs.getReturnDocuments(), + inputs.getTopN(), + model + ), + QueryAndDocsInputs.class + ); + var errorMessage = buildErrorMessage(TaskType.RERANK, model.getInferenceEntityId()); + return new SenderExecutableAction(sender, manager, errorMessage); + } + + /** + * Builds an error message for failed requests. + * + * @param requestType the type of request that failed + * @param inferenceId the inference entity ID associated with the request + * @return a formatted error message + */ + public static String buildErrorMessage(TaskType requestType, String inferenceId) { + return format(FAILED_TO_SEND_REQUEST_ERROR_MESSAGE, requestType.toString(), inferenceId); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionVisitor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionVisitor.java new file mode 100644 index 0000000000000..9b2604d5d6176 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionVisitor.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.action; + +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; + +import java.util.Map; + +/** + * Interface for creating {@link ExecutableAction} instances for Mixedbread models. + *

+ * This interface is used to create {@link ExecutableAction} instances for Mixedbread models + * {@link MixedbreadRerankModel}. + */ +public interface MixedbreadActionVisitor { + + /** + * Creates an {@link ExecutableAction} for the given {@link MixedbreadRerankModel}. + * + * @param model The model to create the action for. + * @return An {@link ExecutableAction} for the given model. + */ + ExecutableAction create(MixedbreadRerankModel model, Map taskSettings); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java new file mode 100644 index 0000000000000..f2a9bc70e0402 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java @@ -0,0 +1,116 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.request; + +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.entity.ByteArrayEntity; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.external.request.HttpRequest; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.services.cohere.CohereAccount; +import org.elasticsearch.xpack.inference.services.cohere.CohereService; +import org.elasticsearch.xpack.inference.services.cohere.request.CohereUtils; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadAccount; + +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.RequestUtils.createAuthBearerHeader; + +public abstract class MixedbreadRequest implements Request, ToXContentObject { + + public static void decorateWithAuthHeader(HttpPost request, MixedbreadAccount account) { + request.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()); + request.setHeader(createAuthBearerHeader(account.apiKey())); + request.setHeader(CohereUtils.createRequestSourceHeader()); + } + + protected final MixedbreadAccount account; + private final String inferenceEntityId; + private final String modelId; + private final boolean stream; + + protected MixedbreadRequest(MixedbreadAccount account, String inferenceEntityId, @Nullable String modelId, boolean stream) { + this.account = account; + this.inferenceEntityId = Objects.requireNonNull(inferenceEntityId); + this.modelId = modelId; // model is optional in the v1 api + this.stream = stream; + } + + @Override + public HttpRequest createHttpRequest() { + HttpPost httpPost = new HttpPost(getURI()); + + ByteArrayEntity byteEntity = new ByteArrayEntity(Strings.toString(this).getBytes(StandardCharsets.UTF_8)); + httpPost.setEntity(byteEntity); + + decorateWithAuthHeader(httpPost, account); + + return new HttpRequest(httpPost, getInferenceEntityId()); + } + + @Override + public String getInferenceEntityId() { + return inferenceEntityId; + } + + @Override + public boolean isStreaming() { + return stream; + } + + @Override + public URI getURI() { + return buildUri(account.baseUri()); + } + + /** + * Returns the URL path segments. + * @return List of segments that make up the path of the request. + */ + protected abstract List pathSegments(); + + private URI buildUri(URI baseUri) { + try { + return new URIBuilder(baseUri).setPathSegments(pathSegments()).build(); + } catch (URISyntaxException e) { + throw new ElasticsearchStatusException( + Strings.format("Failed to construct %s URL", CohereService.NAME), + RestStatus.BAD_REQUEST, + e + ); + } + } + + public String getModelId() { + return modelId; + } + + @Override + public Request truncate() { + // no truncation + return this; + } + + @Override + public boolean[] getTruncationInfo() { + // no truncation + return null; + } +} + diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java new file mode 100644 index 0000000000000..b4d14853d3d4e --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java @@ -0,0 +1,78 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.request; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.cohere.request.CohereUtils; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadAccount; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class MixedbreadRerankRequest extends MixedbreadRequest { + private final String query; + private final List input; + private final Boolean returnDocuments; + private final Integer topN; + private final MixedbreadRerankTaskSettings taskSettings; + + public MixedbreadRerankRequest( + String query, + List input, + @Nullable Boolean returnDocuments, + @Nullable Integer topN, + MixedbreadRerankModel model + ) { + super(MixedbreadAccount.of(model), model.getInferenceEntityId(), model.getServiceSettings().modelId(), false); + + this.input = Objects.requireNonNull(input); + this.query = Objects.requireNonNull(query); + this.returnDocuments = returnDocuments; + this.topN = topN; + taskSettings = model.getTaskSettings(); + } + + @Override + protected List pathSegments() { + return List.of(CohereUtils.VERSION_1, CohereUtils.RERANK_PATH); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + builder.field(CohereUtils.MODEL_FIELD, getModelId()); + builder.field(CohereUtils.QUERY_FIELD, query); + builder.field(CohereUtils.DOCUMENTS_FIELD, input); + + // prefer the root level return_documents over task settings + if (returnDocuments != null) { + builder.field(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, returnDocuments); + } else if (taskSettings.getDoesReturnDocuments() != null) { + builder.field(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, taskSettings.getDoesReturnDocuments()); + } + + // prefer the root level top_n over task settings + if (topN != null) { + builder.field(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, topN); + } else if (taskSettings.getTopNDocumentsOnly() != null) { + builder.field(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, taskSettings.getTopNDocumentsOnly()); + } + + builder.endObject(); + return builder; + } + + public Integer getTopN() { + return topN != null ? topN : taskSettings.getTopNDocumentsOnly(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java new file mode 100644 index 0000000000000..7f036477bec7e --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java @@ -0,0 +1,63 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.request; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.INPUT_FIELD; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.QUERY_FIELD; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.RETURN_DOCUMENTS_FIELD; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.TOP_K_FIELD; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.MODEL_FIELD; + +public record MixedbreadRerankRequestEntity( + String model, + String query, + List input, + @Nullable Integer topN, + @Nullable Boolean returnDocuments, + MixedbreadRerankTaskSettings taskSettings +) implements ToXContentObject { + + public MixedbreadRerankRequestEntity { + Objects.requireNonNull(model); + Objects.requireNonNull(query); + Objects.requireNonNull(input); + Objects.requireNonNull(taskSettings); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + builder.field(MODEL_FIELD, model); + builder.field(QUERY_FIELD, query); + builder.field(INPUT_FIELD, input); + + if (topN != null) { + builder.field(TOP_K_FIELD, topN); + } else if (taskSettings.getTopNDocumentsOnly() != null) { + builder.field(TOP_K_FIELD, taskSettings.getTopNDocumentsOnly()); + } + + if (returnDocuments != null) { + builder.field(RETURN_DOCUMENTS_FIELD, returnDocuments); + } else if (taskSettings.getReturnDocuments() != null) { + builder.field(RETURN_DOCUMENTS_FIELD, taskSettings.getReturnDocuments()); + } + builder.endObject(); + return builder; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java new file mode 100644 index 0000000000000..a150fccc56943 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadModel; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; +import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor; +import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; + +import java.util.Map; + +public class MixedbreadRerankModel extends MixedbreadModel { + public static MixedbreadRerankModel of(MixedbreadRerankModel model, Map taskSettings) { + var requestTaskSettings = MixedbreadRerankTaskSettings.fromMap(taskSettings); + return new MixedbreadRerankModel(model, MixedbreadRerankTaskSettings.of(model.getTaskSettings(), requestTaskSettings)); + } + + public MixedbreadRerankModel( + String modelId, + Map serviceSettings, + Map taskSettings, + @Nullable Map secrets, + ConfigurationParseContext context + ) { + this( + modelId, + MixedbreadRerankServiceSettings.fromMap(serviceSettings, context), + MixedbreadRerankTaskSettings.fromMap(taskSettings), + DefaultSecretSettings.fromMap(secrets) + ); + } + + public MixedbreadRerankModel( + String modelId, + MixedbreadRerankServiceSettings serviceSettings, + MixedbreadRerankTaskSettings taskSettings, + @Nullable DefaultSecretSettings secretSettings + ) { + super( + new ModelConfigurations(modelId, TaskType.RERANK, MixedbreadService.NAME, serviceSettings, taskSettings), + new ModelSecrets(secretSettings), + secretSettings, + serviceSettings + ); + } + + private MixedbreadRerankModel(MixedbreadRerankModel model, MixedbreadRerankTaskSettings taskSettings) { + super(model, taskSettings); + } + + public MixedbreadRerankModel(MixedbreadRerankModel model, MixedbreadRerankServiceSettings serviceSettings) { + super(model, serviceSettings); + } + + @Override + public MixedbreadRerankServiceSettings getServiceSettings() { + return (MixedbreadRerankServiceSettings) super.getServiceSettings(); + } + + @Override + public MixedbreadRerankTaskSettings getTaskSettings() { + return (MixedbreadRerankTaskSettings) super.getTaskSettings(); + } + + @Override + public DefaultSecretSettings getSecretSettings() { + return (DefaultSecretSettings) super.getSecretSettings(); + } + + /** + * Accepts a visitor to create an executable action. The returned action will not return documents in the response. + * @param visitor Interface for creating {@link ExecutableAction} instances for Cohere models. + * @param taskSettings Settings in the request to override the model's defaults + * @return the rerank action + */ + @Override + public ExecutableAction accept(MixedbreadActionVisitor visitor, Map taskSettings) { + return visitor.create(this, taskSettings); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankRequestTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankRequestTaskSettings.java new file mode 100644 index 0000000000000..4f589abc6e368 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankRequestTaskSettings.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; + +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalBoolean; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.RETURN_DOCUMENTS_FIELD; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.TOP_K_FIELD; + +public record MixedbreadRerankRequestTaskSettings(@Nullable Boolean returnDocuments, @Nullable Integer topN) { + + public static final MixedbreadRerankRequestTaskSettings EMPTY_SETTINGS = new MixedbreadRerankRequestTaskSettings(null, null); + + /** + * Extracts the task settings from a map. All settings are considered optional and the absence of a setting + * does not throw an error. + * + * @param map the settings received from a request + * @return a {@link MixedbreadRerankRequestTaskSettings} + */ + public static MixedbreadRerankRequestTaskSettings fromMap(Map map) { + if (map.isEmpty()) { + return MixedbreadRerankRequestTaskSettings.EMPTY_SETTINGS; + } + + final var validationException = new ValidationException(); + + final var returnDocuments = extractOptionalBoolean(map, RETURN_DOCUMENTS_FIELD, validationException); + final var topN = extractOptionalPositiveInteger(map, TOP_K_FIELD, ModelConfigurations.TASK_SETTINGS, validationException); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return new MixedbreadRerankRequestTaskSettings(returnDocuments, topN); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java new file mode 100644 index 0000000000000..90393addf24d4 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; +import org.elasticsearch.xpack.inference.services.mixedbread.response.MixedbreadErrorResponse; +import org.elasticsearch.xpack.inference.services.openai.OpenAiResponseHandler; + +public class MixedbreadRerankResponseHandler extends OpenAiResponseHandler { + /** + * Constructs a new MixedbreadEmbeddingsResponseHandler with the specified request type and response parser. + * + * @param requestType the type of request this handler will process + * @param parseFunction the function to parse the response + */ + public MixedbreadRerankResponseHandler(String requestType, ResponseParser parseFunction) { + super(requestType, parseFunction, MixedbreadErrorResponse::fromResponse, false); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java new file mode 100644 index 0000000000000..04782e9748881 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -0,0 +1,152 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; + +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadRateLimitServiceSettings; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; +import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.io.IOException; +import java.net.URI; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString; + +public class MixedbreadRerankServiceSettings extends FilteredXContentObject + implements + ServiceSettings, + MixedbreadRateLimitServiceSettings { + + public static final String NAME = "mixedbread_ai_rerank_service_settings"; + + /** + * Applied different rate limits based on the type of operation performed: + + * Operation Type Limit Burst Capacity Window + * Read 1,200 1,000 1-minute + * List 600 200 1-minute + * Write 360 120 1-minute + * Update 480 160 1-minute + * Delete 240 80 1-minute + * Rate Limiting. + */ + private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(240); + + public static MixedbreadRerankServiceSettings fromMap(Map map, ConfigurationParseContext context) { + ValidationException validationException = new ValidationException(); + + String model = extractOptionalString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + RateLimitSettings rateLimitSettings = RateLimitSettings.of( + map, + DEFAULT_RATE_LIMIT_SETTINGS, + validationException, + MixedbreadService.NAME, + context + ); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return new MixedbreadRerankServiceSettings(model, rateLimitSettings); + } + + private final String model; + + private final RateLimitSettings rateLimitSettings; + + public MixedbreadRerankServiceSettings(@Nullable String model, @Nullable RateLimitSettings rateLimitSettings) { + this.model = model; + this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); + } + + public MixedbreadRerankServiceSettings(StreamInput in) throws IOException { + this.model = in.readOptionalString(); + this.rateLimitSettings = new RateLimitSettings(in); + } + + @Override + public String modelId() { + return model; + } + + @Override + public RateLimitSettings rateLimitSettings() { + return rateLimitSettings; + } + + @Override + public URI uri() { + return null; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersion.minimumCompatible(); + } + + @Override + protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException { + if (model != null) { + builder.field(MODEL_ID, model); + } + + rateLimitSettings.toXContent(builder, params); + + return builder; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + toXContentFragmentOfExposedFields(builder, params); + + builder.endObject(); + + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalString(model); + rateLimitSettings.writeTo(out); + } + + @Override + public boolean equals(Object object) { + if (this == object) return true; + if (object == null || getClass() != object.getClass()) return false; + MixedbreadRerankServiceSettings that = (MixedbreadRerankServiceSettings) object; + return Objects.equals(model, that.modelId()) + && Objects.equals(rateLimitSettings, that.rateLimitSettings()); + } + + @Override + public int hashCode() { + return Objects.hash(model, rateLimitSettings); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java new file mode 100644 index 0000000000000..ac6436a65e4bb --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java @@ -0,0 +1,160 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalBoolean; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; + +public class MixedbreadRerankTaskSettings implements TaskSettings { + public static final String NAME = "mixedbread_rerank_task_settings"; + public static final String RETURN_DOCUMENTS = "return_documents"; + public static final String TOP_N_DOCS_ONLY = "top_n"; + + static final MixedbreadRerankTaskSettings EMPTY_SETTINGS = new MixedbreadRerankTaskSettings( + null, null); + + public static MixedbreadRerankTaskSettings fromMap(Map map) { + ValidationException validationException = new ValidationException(); + + if (map == null || map.isEmpty()) { + return EMPTY_SETTINGS; + } + + Boolean returnDocuments = extractOptionalBoolean(map, RETURN_DOCUMENTS, validationException); + Integer topNDocumentsOnly = extractOptionalPositiveInteger( + map, + TOP_N_DOCS_ONLY, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return of(topNDocumentsOnly, returnDocuments); + } + + /** + * Creates a new {@link MixedbreadRerankTaskSettings} by preferring non-null fields from the request settings over the original settings. + * + * @param originalSettings the settings stored as part of the inference entity configuration + * @param requestTaskSettings the settings passed in within the task_settings field of the request + * @return a constructed {@link MixedbreadRerankTaskSettings} + */ + public static MixedbreadRerankTaskSettings of( + MixedbreadRerankTaskSettings originalSettings, MixedbreadRerankTaskSettings requestTaskSettings) { + return new MixedbreadRerankTaskSettings( + requestTaskSettings.getTopNDocumentsOnly() != null + ? requestTaskSettings.getTopNDocumentsOnly() + : originalSettings.getTopNDocumentsOnly(), + requestTaskSettings.getReturnDocuments() != null + ? requestTaskSettings.getReturnDocuments() + : originalSettings.getReturnDocuments() + ); + } + + public static MixedbreadRerankTaskSettings of(Integer topNDocumentsOnly, Boolean returnDocuments) { + return new MixedbreadRerankTaskSettings(topNDocumentsOnly, returnDocuments); + } + + private final Integer topNDocumentsOnly; + private final Boolean returnDocuments; + + public MixedbreadRerankTaskSettings(StreamInput in) throws IOException { + this(in.readOptionalInt(), in.readOptionalBoolean()); + } + + public MixedbreadRerankTaskSettings( + @Nullable Integer topNDocumentsOnly, + @Nullable Boolean doReturnDocuments + ) { + this.topNDocumentsOnly = topNDocumentsOnly; + this.returnDocuments = doReturnDocuments; + } + + @Override + public boolean isEmpty() { + return topNDocumentsOnly == null && returnDocuments == null; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (topNDocumentsOnly != null) { + builder.field(TOP_N_DOCS_ONLY, topNDocumentsOnly); + } + if (returnDocuments != null) { + builder.field(RETURN_DOCUMENTS, returnDocuments); + } + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersion.minimumCompatible(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalInt(topNDocumentsOnly); + out.writeOptionalBoolean(returnDocuments); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MixedbreadRerankTaskSettings that = (MixedbreadRerankTaskSettings) o; + return Objects.equals(returnDocuments, that.returnDocuments) + && Objects.equals(topNDocumentsOnly, that.topNDocumentsOnly); + } + + @Override + public int hashCode() { + return Objects.hash(returnDocuments, topNDocumentsOnly); + } + + public Boolean getDoesReturnDocuments() { + return returnDocuments; + } + + public Integer getTopNDocumentsOnly() { + return topNDocumentsOnly; + } + + public Boolean getReturnDocuments() { + return returnDocuments; + } + + @Override + public TaskSettings updatedTaskSettings(Map newSettings) { + MixedbreadRerankTaskSettings updatedSettings = MixedbreadRerankTaskSettings.fromMap(new HashMap<>(newSettings)); + return MixedbreadRerankTaskSettings.of(this, updatedSettings); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadErrorResponse.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadErrorResponse.java new file mode 100644 index 0000000000000..3e4d4d14cf7d8 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadErrorResponse.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.response; + +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse; + +import java.nio.charset.StandardCharsets; + +public class MixedbreadErrorResponse extends ErrorResponse { + public MixedbreadErrorResponse(String message) { + super(message); + } + + public static ErrorResponse fromResponse(HttpResult response) { + try { + String errorMessage = new String(response.body(), StandardCharsets.UTF_8); + return new MixedbreadErrorResponse(errorMessage); + } catch (Exception e) { + // swallow the error + } + return ErrorResponse.UNDEFINED_ERROR; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java new file mode 100644 index 0000000000000..be535e51750db --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java @@ -0,0 +1,142 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.response; + +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.inference.external.http.HttpResult; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.elasticsearch.common.xcontent.XContentParserUtils.parseList; +import static org.elasticsearch.common.xcontent.XContentParserUtils.throwUnknownToken; +import static org.elasticsearch.xpack.inference.external.response.XContentUtils.moveToFirstToken; +import static org.elasticsearch.xpack.inference.external.response.XContentUtils.positionParserAtTokenAfterField; + +public class MixedbreadRerankResponseEntity { + /** + * Parses the Mixedbread rerank response. + + * For a request like: + *{ + * "model": "mixedbread-ai/mxbai-rerank-xsmall-v1", + * "query": "Who is the author of To Kill a Mockingbird?", + * "input": [ + * "To Kill a Mockingbird is a novel by Harper Lee", + * "The novel Moby-Dick was written by Herman Melville", + * "Harper Lee, an American novelist", + * "Jane Austen was an English novelist", + * "The Harry Potter series written by British author J.K. Rowling", + * "The Great Gatsby, a novel written by American author F. Scott Fitzgerald" + * ], + * "top_k": 3, + * "return_input": false + * } + *

+ * The response will look like (without whitespace): + *{ + * "usage": { + * "prompt_tokens": 162, + * "total_tokens": 162, + * "completion_tokens": 0 + * }, + * "model": "mixedbread-ai/mxbai-rerank-xsmall-v1", + * "data": [ + * { + * "index": 0, + * "score": 0.98291015625, + * "input": null, + * "object": "rank_result" + * }, + * { + * "index": 2, + * "score": 0.61962890625, + * "input": null, + * "object": "rank_result" + * }, + * { + * "index": 3, + * "score": 0.3642578125, + * "input": null, + * "object": "rank_result" + * } + * ], + * "object": "list", + * "top_k": 3, + * "return_input": false + * } + + * Parses the response from a Mixedbread rerank request and returns the results. + + * @param response the http response from Mixedbread + * @return the parsed response + * @throws IOException if there is an error parsing the response + */ + public static InferenceServiceResults fromResponse(HttpResult response) throws IOException { + var parserConfig = XContentParserConfiguration.EMPTY.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE); + + try (XContentParser jsonParser = XContentFactory.xContent(XContentType.JSON).createParser(parserConfig, response.body())) { + moveToFirstToken(jsonParser); + moveToFirstToken(jsonParser); + return new RankedDocsResults(doParse(jsonParser)); + } + } + + private static List doParse(XContentParser parser) throws IOException { + XContentParser.Token token = parser.currentToken(); + ensureExpectedToken(XContentParser.Token.START_OBJECT, token, parser); + + positionParserAtTokenAfterField(parser, "data", "FAILED_TO_FIND_FIELD_TEMPLATE"); // TODO error message + + token = parser.currentToken(); + if (token == XContentParser.Token.START_ARRAY) { + return parseList(parser, (listParser, index) -> { + var parsedRankedDoc = RankedDocEntry.parse(parser); + return new RankedDocsResults.RankedDoc(parsedRankedDoc.index, parsedRankedDoc.score, parsedRankedDoc.text); + }); + } else { + throwUnknownToken(token, parser); + } + + // This should never be reached. The above code should either return successfully or hit the throwUnknownToken + // or throw a parsing exception + throw new IllegalStateException("Reached an invalid state while parsing the Cohere response"); + } + + private record RankedDocEntry(Integer index, Float score, @Nullable String text) { + + private static final ParseField TEXT = new ParseField("input"); + private static final ParseField SCORE = new ParseField("score"); + private static final ParseField INDEX = new ParseField("index"); + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "mixedbread_rerank_response", + true, + args -> new RankedDocEntry((int) args[0], (float) args[1], (String) args[2]) + ); + + static { + PARSER.declareInt(ConstructingObjectParser.constructorArg(), INDEX); + PARSER.declareFloat(ConstructingObjectParser.constructorArg(), SCORE); + PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TEXT); + } + + public static RankedDocEntry parse(XContentParser parser) { + return PARSER.apply(parser, null); + } + } +} From 0a184ca7d314e514920b5983477b7b229e97906c Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Mon, 12 Jan 2026 08:20:10 +0100 Subject: [PATCH 02/48] Add Mixedbread AI Rerank support tests --- .../MixedbreadRerankRequestEntityTests.java | 161 ++++++++++++++++++ .../request/MixedbreadRerankRequestTests.java | 82 +++++++++ .../rerank/MixedbreadRerankModelTests.java | 22 +++ .../MixedbreadRerankServiceSettingsTests.java | 95 +++++++++++ .../MixedbreadRerankTaskSettingsTests.java | 139 +++++++++++++++ .../MixedbreadRerankResponseEntityTests.java | 127 ++++++++++++++ 6 files changed, 626 insertions(+) create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java new file mode 100644 index 0000000000000..430aca712d801 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java @@ -0,0 +1,161 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.request; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.xpack.inference.MatchersUtils.equalToIgnoringWhitespaceInJsonString; + +public class MixedbreadRerankRequestEntityTests extends ESTestCase { + + public static final String MODEL = "model"; + public static final String QUERY = "query"; + + public void testXContent_SingleRequest_WritesAllFieldsIfDefined() throws IOException { + var entity = new MixedbreadRerankRequestEntity( + MODEL, + QUERY, + List.of("abc"), + 12, + Boolean.TRUE, + new MixedbreadRerankTaskSettings(8, Boolean.FALSE) + ); + + assertThat(getXContentResult(entity), equalToIgnoringWhitespaceInJsonString(""" + { + "model": "model", + "query": "query", + "input": [ + "abc" + ], + "top_k": 12, + "return_documents": true + } + """)); + } + + public void testXContent_SingleRequest_WritesMinimalFields() throws IOException { + var entity = new MixedbreadRerankRequestEntity( + MODEL, + QUERY, + List.of("abc"), + null, + null, + new MixedbreadRerankTaskSettings(null, null)); + + assertThat(getXContentResult(entity), equalToIgnoringWhitespaceInJsonString(""" + { + "model": "model", + "query": "query", + "input": [ + "abc" + ] + } + """)); + } + + public void testXContent_MultipleRequests_WritesAllFieldsIfDefined() throws IOException { + var entity = new MixedbreadRerankRequestEntity( + MODEL, + QUERY, + List.of("abc", "def"), + 12, + Boolean.FALSE, + new MixedbreadRerankTaskSettings(8, Boolean.TRUE) + ); + + assertThat(getXContentResult(entity), equalToIgnoringWhitespaceInJsonString(""" + { + "model": "model", + "query": "query", + "input": [ + "abc", + "def" + ], + "top_k": 12, + "return_documents": false + } + """)); + } + + public void testXContent_MultipleRequests_WritesMinimalFields() throws IOException { + var entity = new MixedbreadRerankRequestEntity( + MODEL, QUERY, List.of("abc", "def"), null, null, new MixedbreadRerankTaskSettings(null, null)); + + assertThat(getXContentResult(entity), equalToIgnoringWhitespaceInJsonString(""" + { + "model": "model", + "query": "query", + "input": [ + "abc", + "def" + ] + } + """)); + } + + public void testXContent_SingleRequest_UsesTaskSettingsTopNIfRootIsNotDefined() throws IOException { + var entity = new MixedbreadRerankRequestEntity( + MODEL, + QUERY, + List.of("abc"), + null, + null, + new MixedbreadRerankTaskSettings(8, Boolean.FALSE) + ); + + assertThat(getXContentResult(entity), equalToIgnoringWhitespaceInJsonString(""" + { + "model": "model", + "query": "query", + "input": [ + "abc" + ], + "top_k": 8, + "return_documents": false + } + """)); + } + + public void testXContent_SingleRequest_UsesTaskSettingsReturnDocumentsIfRootIsNotDefined() throws IOException { + var entity = new MixedbreadRerankRequestEntity( + MODEL, + QUERY, + List.of("abc"), + null, + null, + new MixedbreadRerankTaskSettings(8, Boolean.TRUE) + ); + + assertThat(getXContentResult(entity), equalToIgnoringWhitespaceInJsonString(""" + { + "model": "model", + "query": "query", + "input": [ + "abc" + ], + "top_k": 8, + "return_documents": true + } + """)); + } + + private String getXContentResult(MixedbreadRerankRequestEntity entity) throws IOException { + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + entity.toXContent(builder, null); + return Strings.toString(builder); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java new file mode 100644 index 0000000000000..cfa3e7efd5e7b --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.request; + +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModelTests; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; + +public class MixedbreadRerankRequestTests extends ESTestCase { + + private static final String API_KEY = "api_key"; + public static final String INPUT = "input"; + public static final String MODEL = "model"; + public static final String QUERY = "query"; + public static final int TOP_N = 1; + + public void testCreateRequest_WithMinimalFieldsSet() throws IOException { + var request = createRequest(QUERY, INPUT, MODEL, null, null); + var requestMap = getEntityAsMap(request); + assertThat(requestMap, aMapWithSize(3)); + assertThat(requestMap.get("documents"), is(List.of(INPUT))); + assertThat(requestMap.get("query"), is(QUERY)); + assertThat(requestMap.get("model"), is(MODEL)); + } + + public void testCreateRequest_WithAllFieldsSet() throws IOException { + var request = createRequest(QUERY, INPUT, MODEL, TOP_N, Boolean.FALSE); + Map requestMap = getEntityAsMap(request); + + assertThat(requestMap, aMapWithSize(5)); + assertThat(requestMap.get("documents"), is(List.of(INPUT))); + assertThat(requestMap.get("query"), is(QUERY)); + assertThat(requestMap.get("top_n"), is(TOP_N)); + assertThat(requestMap.get("return_documents"), is(Boolean.FALSE)); + assertThat(requestMap.get("model"), is(MODEL)); + } + + public void testTruncate_DoesNotTruncate() { + var request = createRequest(QUERY, INPUT, "null", null, null); + var truncatedRequest = request.truncate(); + + assertThat(truncatedRequest, sameInstance(request)); + } + + private static MixedbreadRerankRequest createRequest( + String query, + String input, + @Nullable String modelId, + @Nullable Integer topN, + @Nullable Boolean returnDocuments + ) { + var rerankModel = MixedbreadRerankModelTests.createModel(modelId, API_KEY); + return new MixedbreadRerankRequest(query, List.of(input), returnDocuments, topN, rerankModel); + } + + private Map getEntityAsMap(MixedbreadRerankRequest request) throws IOException { + var httpRequest = request.createHttpRequest(); + assertThat(httpRequest.httpRequestBase(), instanceOf(HttpPost.class)); + var httpPost = (HttpPost) httpRequest.httpRequestBase(); + assertThat(httpPost.getLastHeader(HttpHeaders.CONTENT_TYPE).getValue(), is(XContentType.JSON.mediaType())); + assertThat(httpPost.getLastHeader(HttpHeaders.AUTHORIZATION).getValue(), is("Bearer " + API_KEY)); + return entityAsMap(httpPost.getEntity().getContent()); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java new file mode 100644 index 0000000000000..de90a46b457be --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; + +public class MixedbreadRerankModelTests { + public static MixedbreadRerankModel createModel(String model, String apiKey) { + return new MixedbreadRerankModel( + model, + new MixedbreadRerankServiceSettings(model, null), + new MixedbreadRerankTaskSettings(null, null), + new DefaultSecretSettings(new SecureString(apiKey.toCharArray())) + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java new file mode 100644 index 0000000000000..e3bed30e2115b --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java @@ -0,0 +1,95 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.services.ServiceFields; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettingsTests; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.MatchersUtils.equalToIgnoringWhitespaceInJsonString; + +public class MixedbreadRerankServiceSettingsTests extends AbstractWireSerializingTestCase { + + public static MixedbreadRerankServiceSettings createRandom() { + return createRandom(randomFrom(new RateLimitSettings[] { null, RateLimitSettingsTests.createRandom() })); + } + + public static MixedbreadRerankServiceSettings createRandom(@Nullable RateLimitSettings rateLimitSettings) { + return new MixedbreadRerankServiceSettings( + randomAlphaOfLengthOrNull(10), + rateLimitSettings + ); + } + + public void testToXContent_WritesAllValues() throws IOException { + var model = "model"; + + var serviceSettings = new MixedbreadRerankServiceSettings(model, null); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + serviceSettings.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, equalToIgnoringWhitespaceInJsonString(""" + { + "model_id":"model", + "rate_limit": { + "requests_per_minute": 240 + } + } + """)); + } + + @Override + protected Writeable.Reader instanceReader() { + return MixedbreadRerankServiceSettings::new; + } + + @Override + protected MixedbreadRerankServiceSettings createTestInstance() { + return createRandom(); + } + + @Override + protected MixedbreadRerankServiceSettings mutateInstance(MixedbreadRerankServiceSettings instance) throws IOException { + var modelId = instance.modelId(); + var rateLimitSettings = instance.rateLimitSettings(); + switch (randomInt(1)) { + case 0 -> modelId = randomValueOtherThan(modelId, () -> randomAlphaOfLengthOrNull(10)); + case 1 -> rateLimitSettings = randomValueOtherThan(rateLimitSettings, RateLimitSettingsTests::createRandom); + default -> throw new AssertionError("Illegal randomisation branch"); + } + + return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings); + } + + public static Map getServiceSettingsMap(@Nullable String url, @Nullable String model) { + var map = new HashMap(); + + if (url != null) { + map.put(ServiceFields.URL, url); + } + + if (model != null) { + map.put(ServiceFields.MODEL_ID, model); + } + + return map; + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java new file mode 100644 index 0000000000000..3ffeb79ec5f9b --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java @@ -0,0 +1,139 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; + +public class MixedbreadRerankTaskSettingsTests extends AbstractWireSerializingTestCase { + + public static MixedbreadRerankTaskSettings createRandom() { + var returnDocuments = randomOptionalBoolean(); + var topNDocsOnly = randomBoolean() ? randomIntBetween(1, 10) : null; + + return new MixedbreadRerankTaskSettings(topNDocsOnly, returnDocuments); + } + + public void testFromMap_WithValidValues_ReturnsSettings() { + Map taskMap = Map.of( + MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, + true, + MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, + 5 + ); + var settings = MixedbreadRerankTaskSettings.fromMap(new HashMap<>(taskMap)); + assertTrue(settings.getReturnDocuments()); + assertEquals(5, settings.getTopNDocumentsOnly().intValue()); + } + + public void testFromMap_WithNullValues_ReturnsSettingsWithNulls() { + var settings = MixedbreadRerankTaskSettings.fromMap(Map.of()); + assertNull(settings.getReturnDocuments()); + assertNull(settings.getTopNDocumentsOnly()); + } + + public void testFromMap_WithInvalidReturnDocuments_ThrowsValidationException() { + Map taskMap = Map.of( + MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, + "invalid", + MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, + 5 + ); + var thrownException = expectThrows(ValidationException.class, () -> MixedbreadRerankTaskSettings.fromMap(new HashMap<>(taskMap))); + assertThat(thrownException.getMessage(), containsString("field [return_documents] is not of the expected type")); + } + + public void testFromMap_WithInvalidTopNDocsOnly_ThrowsValidationException() { + Map taskMap = Map.of( + MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, + true, + MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, + "invalid" + ); + var thrownException = expectThrows(ValidationException.class, () -> MixedbreadRerankTaskSettings.fromMap(new HashMap<>(taskMap))); + assertThat(thrownException.getMessage(), containsString("field [top_n] is not of the expected type")); + } + + public void UpdatedTaskSettings_WithEmptyMap_ReturnsSameSettings() { + var initialSettings = new MixedbreadRerankTaskSettings(5, true); + MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(Map.of()); + assertEquals(initialSettings, updatedSettings); + } + + public void testUpdatedTaskSettings_WithNewReturnDocuments_ReturnsUpdatedSettings() { + var initialSettings = new MixedbreadRerankTaskSettings(5, true); + Map newSettings = Map.of(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, false); + MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(newSettings); + assertFalse(updatedSettings.getReturnDocuments()); + assertEquals(initialSettings.getTopNDocumentsOnly(), updatedSettings.getTopNDocumentsOnly()); + } + + public void testUpdatedTaskSettings_WithNewTopNDocsOnly_ReturnsUpdatedSettings() { + var initialSettings = new MixedbreadRerankTaskSettings(5, true); + Map newSettings = Map.of(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, 7); + MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(newSettings); + assertEquals(7, updatedSettings.getTopNDocumentsOnly().intValue()); + assertEquals(initialSettings.getReturnDocuments(), updatedSettings.getReturnDocuments()); + } + + public void testUpdatedTaskSettings_WithMultipleNewValues_ReturnsUpdatedSettings() { + var initialSettings = new MixedbreadRerankTaskSettings(5, true); + Map newSettings = Map.of( + MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, + false, + MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, + 7 + ); + MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(newSettings); + assertFalse(updatedSettings.getReturnDocuments()); + assertEquals(7, updatedSettings.getTopNDocumentsOnly().intValue()); + } + + @Override + protected Writeable.Reader instanceReader() { + return MixedbreadRerankTaskSettings::new; + } + + @Override + protected MixedbreadRerankTaskSettings createTestInstance() { + return createRandom(); + } + + @Override + protected MixedbreadRerankTaskSettings mutateInstance(MixedbreadRerankTaskSettings instance) throws IOException { + var topNDocsOnly = instance.getTopNDocumentsOnly(); + var returnDocuments = instance.getReturnDocuments(); + switch (randomInt(1)) { + case 0 -> topNDocsOnly = randomValueOtherThan(topNDocsOnly, () -> randomFrom(randomIntBetween(1, 10), null)); + case 1 -> returnDocuments = returnDocuments == null ? randomBoolean() : returnDocuments == false; + } + return new MixedbreadRerankTaskSettings(topNDocsOnly, returnDocuments); + } + + public static Map getTaskSettingsMap(@Nullable Integer topNDocumentsOnly, Boolean returnDocuments) { + var map = new HashMap(); + + if (topNDocumentsOnly != null) { + map.put(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, topNDocumentsOnly.toString()); + } + + if (returnDocuments != null) { + map.put(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, returnDocuments.toString()); + } + + return map; + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java new file mode 100644 index 0000000000000..6fc4fdfc33786 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java @@ -0,0 +1,127 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.response; + +import org.apache.http.HttpResponse; +import org.elasticsearch.common.Strings; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.inference.external.http.HttpResult; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; + +public class MixedbreadRerankResponseEntityTests extends ESTestCase { + + private static final String HARPER_LEE = "Harper Lee, an American novelist"; + private static final String NOVEL_BY_HARPER_LEE = "To Kill a Mockingbird is a novel by Harper Lee"; + private static final String JANE_AUSTEN = "Jane Austen was an English novelist"; + + private static final List RESPONSE_LITERAL_DOCS = List.of( + new RankedDocsResults.RankedDoc(0, 0.98291015625F, null), + new RankedDocsResults.RankedDoc(2, 0.61962890625F, null), + new RankedDocsResults.RankedDoc(3, 0.3642578125F, null) + ); + + private static final List RESPONSE_LITERAL_DOCS_WITH_TEXT = List.of( + new RankedDocsResults.RankedDoc(0, 0.98291015625F, HARPER_LEE), + new RankedDocsResults.RankedDoc(2, 0.61962890625F, NOVEL_BY_HARPER_LEE), + new RankedDocsResults.RankedDoc(3, 0.3642578125F, JANE_AUSTEN) + ); + + public void testResponseLiteral() throws IOException { + + InferenceServiceResults parsedResults = MixedbreadRerankResponseEntity.fromResponse( + new HttpResult(mock(HttpResponse.class), RESPONSE_LITERAL.getBytes(StandardCharsets.UTF_8)) + ); + + assertThat(parsedResults, instanceOf(RankedDocsResults.class)); + for (int i = 0; i < ((RankedDocsResults) parsedResults).getRankedDocs().size(); i++) { + assertEquals(((RankedDocsResults) parsedResults).getRankedDocs().get(i).index(), RESPONSE_LITERAL_DOCS.get(i).index()); + } + } + + public void testResponseLiteralWithDocumentsAsString() throws IOException { + InferenceServiceResults parsedResults = MixedbreadRerankResponseEntity.fromResponse( + new HttpResult(mock(HttpResponse.class), RESPONSE_LITERAL_WITH_INPUT.getBytes(StandardCharsets.UTF_8)) + ); + assertThat(parsedResults, instanceOf(RankedDocsResults.class)); + assertThat(((RankedDocsResults) parsedResults).getRankedDocs(), is(RESPONSE_LITERAL_DOCS_WITH_TEXT)); + } + + private static final String RESPONSE_LITERAL = """ + { + "usage": { + "prompt_tokens": 162, + "total_tokens": 162, + "completion_tokens": 0 + }, + "model": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "data": [ + { + "index": 0, + "score": 0.98291015625, + "object": "rank_result" + }, + { + "index": 2, + "score": 0.61962890625, + "object": "rank_result" + }, + { + "index": 3, + "score": 0.3642578125, + "object": "rank_result" + } + ], + "object": "list", + "top_k": 3, + "return_input": false + } + """; + + private static final String RESPONSE_LITERAL_WITH_INPUT = Strings.format(""" + { + "usage": { + "prompt_tokens": 162, + "total_tokens": 162, + "completion_tokens": 0 + }, + "model": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "data": [ + { + "index": 0, + "score": 0.98291015625, + "input": "%s", + "object": "rank_result" + }, + { + "index": 2, + "score": 0.61962890625, + "input": "%s", + "object": "rank_result" + }, + { + "index": 3, + "score": 0.3642578125, + "input": "%s", + "object": "rank_result" + } + ], + "object": "list", + "top_k": 3, + "return_input": false + } + """, HARPER_LEE, NOVEL_BY_HARPER_LEE, JANE_AUSTEN); +} From 6133d64dfed0feaf921113a21f89b099f0855ade Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Tue, 13 Jan 2026 15:54:27 +0100 Subject: [PATCH 03/48] Apply spotless --- .../mixedbread/MixedbreadService.java | 22 ++-- .../mixedbread/request/MixedbreadRequest.java | 2 - .../MixedbreadRerankRequestEntity.java | 2 +- .../MixedbreadRerankServiceSettings.java | 9 +- .../rerank/MixedbreadRerankTaskSettings.java | 15 +-- .../MixedbreadRerankRequestEntityTests.java | 11 +- .../MixedbreadRerankServiceSettingsTests.java | 5 +- .../MixedbreadRerankResponseEntityTests.java | 118 +++++++++--------- 8 files changed, 91 insertions(+), 93 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java index a2ef58581efdd..ee2e7b0d16065 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -115,9 +115,9 @@ public void parseRequestConfig( ConfigurationParseContext.REQUEST ); -// throwIfNotEmptyMap(config, NAME); -// throwIfNotEmptyMap(serviceSettingsMap, NAME); -// throwIfNotEmptyMap(taskSettingsMap, NAME); + // throwIfNotEmptyMap(config, NAME); + // throwIfNotEmptyMap(serviceSettingsMap, NAME); + // throwIfNotEmptyMap(taskSettingsMap, NAME); parsedModelListener.onResponse(model); } catch (Exception e) { @@ -154,8 +154,7 @@ private static MixedbreadModel createModel( ConfigurationParseContext context ) { return switch (taskType) { - case RERANK -> new MixedbreadRerankModel( - inferenceEntityId, serviceSettings, taskSettings, secretSettings, context); + case RERANK -> new MixedbreadRerankModel(inferenceEntityId, serviceSettings, taskSettings, secretSettings, context); default -> throw createInvalidTaskTypeException(inferenceEntityId, NAME, taskType, context); }; } @@ -225,8 +224,13 @@ protected void doUnifiedCompletionInfer( @Override protected void doChunkedInfer( - Model model, List inputs, Map taskSettings, - InputType inputType, TimeValue timeout, ActionListener> listener) { + Model model, + List inputs, + Map taskSettings, + InputType inputType, + TimeValue timeout, + ActionListener> listener + ) { } @@ -285,8 +289,8 @@ public static InferenceServiceConfiguration get() { configurationMap.put( MODEL_ID, new SettingsConfiguration.Builder(supportedTaskTypes).setDescription( - "The name of the model to use for the inference task." - ) + "The name of the model to use for the inference task." + ) .setLabel("Model ID") .setRequired(true) .setSensitive(false) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java index f2a9bc70e0402..d43d1d4680e33 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java @@ -19,7 +19,6 @@ import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.inference.external.request.HttpRequest; import org.elasticsearch.xpack.inference.external.request.Request; -import org.elasticsearch.xpack.inference.services.cohere.CohereAccount; import org.elasticsearch.xpack.inference.services.cohere.CohereService; import org.elasticsearch.xpack.inference.services.cohere.request.CohereUtils; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadAccount; @@ -113,4 +112,3 @@ public boolean[] getTruncationInfo() { return null; } } - diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java index 7f036477bec7e..f1a856255ff2d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java @@ -17,10 +17,10 @@ import java.util.Objects; import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.INPUT_FIELD; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.MODEL_FIELD; import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.QUERY_FIELD; import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.RETURN_DOCUMENTS_FIELD; import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.TOP_K_FIELD; -import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.MODEL_FIELD; public record MixedbreadRerankRequestEntity( String model, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java index 04782e9748881..3265fc87f17b6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -16,7 +16,6 @@ import org.elasticsearch.inference.ServiceSettings; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; - import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadRateLimitServiceSettings; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject; @@ -30,10 +29,7 @@ import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString; -public class MixedbreadRerankServiceSettings extends FilteredXContentObject - implements - ServiceSettings, - MixedbreadRateLimitServiceSettings { +public class MixedbreadRerankServiceSettings extends FilteredXContentObject implements ServiceSettings, MixedbreadRateLimitServiceSettings { public static final String NAME = "mixedbread_ai_rerank_service_settings"; @@ -141,8 +137,7 @@ public boolean equals(Object object) { if (this == object) return true; if (object == null || getClass() != object.getClass()) return false; MixedbreadRerankServiceSettings that = (MixedbreadRerankServiceSettings) object; - return Objects.equals(model, that.modelId()) - && Objects.equals(rateLimitSettings, that.rateLimitSettings()); + return Objects.equals(model, that.modelId()) && Objects.equals(rateLimitSettings, that.rateLimitSettings()); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java index ac6436a65e4bb..c95515cc05f87 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java @@ -29,8 +29,7 @@ public class MixedbreadRerankTaskSettings implements TaskSettings { public static final String RETURN_DOCUMENTS = "return_documents"; public static final String TOP_N_DOCS_ONLY = "top_n"; - static final MixedbreadRerankTaskSettings EMPTY_SETTINGS = new MixedbreadRerankTaskSettings( - null, null); + static final MixedbreadRerankTaskSettings EMPTY_SETTINGS = new MixedbreadRerankTaskSettings(null, null); public static MixedbreadRerankTaskSettings fromMap(Map map) { ValidationException validationException = new ValidationException(); @@ -62,7 +61,9 @@ public static MixedbreadRerankTaskSettings fromMap(Map map) { * @return a constructed {@link MixedbreadRerankTaskSettings} */ public static MixedbreadRerankTaskSettings of( - MixedbreadRerankTaskSettings originalSettings, MixedbreadRerankTaskSettings requestTaskSettings) { + MixedbreadRerankTaskSettings originalSettings, + MixedbreadRerankTaskSettings requestTaskSettings + ) { return new MixedbreadRerankTaskSettings( requestTaskSettings.getTopNDocumentsOnly() != null ? requestTaskSettings.getTopNDocumentsOnly() @@ -84,10 +85,7 @@ public MixedbreadRerankTaskSettings(StreamInput in) throws IOException { this(in.readOptionalInt(), in.readOptionalBoolean()); } - public MixedbreadRerankTaskSettings( - @Nullable Integer topNDocumentsOnly, - @Nullable Boolean doReturnDocuments - ) { + public MixedbreadRerankTaskSettings(@Nullable Integer topNDocumentsOnly, @Nullable Boolean doReturnDocuments) { this.topNDocumentsOnly = topNDocumentsOnly; this.returnDocuments = doReturnDocuments; } @@ -131,8 +129,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; MixedbreadRerankTaskSettings that = (MixedbreadRerankTaskSettings) o; - return Objects.equals(returnDocuments, that.returnDocuments) - && Objects.equals(topNDocumentsOnly, that.topNDocumentsOnly); + return Objects.equals(returnDocuments, that.returnDocuments) && Objects.equals(topNDocumentsOnly, that.topNDocumentsOnly); } @Override diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java index 430aca712d801..e7c8b0b04574f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java @@ -54,7 +54,8 @@ public void testXContent_SingleRequest_WritesMinimalFields() throws IOException List.of("abc"), null, null, - new MixedbreadRerankTaskSettings(null, null)); + new MixedbreadRerankTaskSettings(null, null) + ); assertThat(getXContentResult(entity), equalToIgnoringWhitespaceInJsonString(""" { @@ -93,7 +94,13 @@ public void testXContent_MultipleRequests_WritesAllFieldsIfDefined() throws IOEx public void testXContent_MultipleRequests_WritesMinimalFields() throws IOException { var entity = new MixedbreadRerankRequestEntity( - MODEL, QUERY, List.of("abc", "def"), null, null, new MixedbreadRerankTaskSettings(null, null)); + MODEL, + QUERY, + List.of("abc", "def"), + null, + null, + new MixedbreadRerankTaskSettings(null, null) + ); assertThat(getXContentResult(entity), equalToIgnoringWhitespaceInJsonString(""" { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java index e3bed30e2115b..3254952a2d314 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java @@ -31,10 +31,7 @@ public static MixedbreadRerankServiceSettings createRandom() { } public static MixedbreadRerankServiceSettings createRandom(@Nullable RateLimitSettings rateLimitSettings) { - return new MixedbreadRerankServiceSettings( - randomAlphaOfLengthOrNull(10), - rateLimitSettings - ); + return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings); } public void testToXContent_WritesAllValues() throws IOException { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java index 6fc4fdfc33786..7ba93ed95852f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java @@ -61,67 +61,67 @@ public void testResponseLiteralWithDocumentsAsString() throws IOException { } private static final String RESPONSE_LITERAL = """ - { - "usage": { - "prompt_tokens": 162, - "total_tokens": 162, - "completion_tokens": 0 + { + "usage": { + "prompt_tokens": 162, + "total_tokens": 162, + "completion_tokens": 0 + }, + "model": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "data": [ + { + "index": 0, + "score": 0.98291015625, + "object": "rank_result" }, - "model": "mixedbread-ai/mxbai-rerank-xsmall-v1", - "data": [ - { - "index": 0, - "score": 0.98291015625, - "object": "rank_result" - }, - { - "index": 2, - "score": 0.61962890625, - "object": "rank_result" - }, - { - "index": 3, - "score": 0.3642578125, - "object": "rank_result" - } - ], - "object": "list", - "top_k": 3, - "return_input": false - } - """; + { + "index": 2, + "score": 0.61962890625, + "object": "rank_result" + }, + { + "index": 3, + "score": 0.3642578125, + "object": "rank_result" + } + ], + "object": "list", + "top_k": 3, + "return_input": false + } + """; private static final String RESPONSE_LITERAL_WITH_INPUT = Strings.format(""" - { - "usage": { - "prompt_tokens": 162, - "total_tokens": 162, - "completion_tokens": 0 + { + "usage": { + "prompt_tokens": 162, + "total_tokens": 162, + "completion_tokens": 0 + }, + "model": "mixedbread-ai/mxbai-rerank-xsmall-v1", + "data": [ + { + "index": 0, + "score": 0.98291015625, + "input": "%s", + "object": "rank_result" }, - "model": "mixedbread-ai/mxbai-rerank-xsmall-v1", - "data": [ - { - "index": 0, - "score": 0.98291015625, - "input": "%s", - "object": "rank_result" - }, - { - "index": 2, - "score": 0.61962890625, - "input": "%s", - "object": "rank_result" - }, - { - "index": 3, - "score": 0.3642578125, - "input": "%s", - "object": "rank_result" - } - ], - "object": "list", - "top_k": 3, - "return_input": false - } - """, HARPER_LEE, NOVEL_BY_HARPER_LEE, JANE_AUSTEN); + { + "index": 2, + "score": 0.61962890625, + "input": "%s", + "object": "rank_result" + }, + { + "index": 3, + "score": 0.3642578125, + "input": "%s", + "object": "rank_result" + } + ], + "object": "list", + "top_k": 3, + "return_input": false + } + """, HARPER_LEE, NOVEL_BY_HARPER_LEE, JANE_AUSTEN); } From 6b63ffbc4fe272a8f6a81262c03616c7c68ee651 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Wed, 14 Jan 2026 08:49:37 +0100 Subject: [PATCH 04/48] Add Mixedbread AI Rerank support --- .../mixedbread/MixedbreadConstants.java | 8 +- .../services/mixedbread/MixedbreadModel.java | 11 +- .../mixedbread/MixedbreadResponseHandler.java | 68 ---------- .../mixedbread/MixedbreadService.java | 4 +- .../mixedbread/request/MixedbreadRequest.java | 13 +- .../request/MixedbreadRerankRequest.java | 10 +- .../rerank/MixedbreadRerankModel.java | 35 ++++- .../MixedbreadRerankServiceSettings.java | 21 ++- .../MixedbreadRerankResponseEntity.java | 2 +- .../mixedbread/MixedbreadServiceTests.java | 126 ++++++++++++++++++ .../request/MixedbreadRerankRequestTests.java | 2 +- .../rerank/MixedbreadRerankModelTests.java | 78 ++++++++++- .../MixedbreadRerankServiceSettingsTests.java | 6 +- 13 files changed, 280 insertions(+), 104 deletions(-) delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadResponseHandler.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java index 033f800fc8d6b..2f7cca8f40564 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java @@ -8,19 +8,19 @@ package org.elasticsearch.xpack.inference.services.mixedbread; public class MixedbreadConstants { - public static final String RERANK_URI_PATH = "/v1/rerank"; + public static final String VERSION_1 = "v1"; + public static final String RERANK_PATH = "rerank"; // common service settings fields - public static final String API_KEY_FIELD = "api_key"; - public static final String MODEL_FIELD = "model"; - // embeddings service and request settings public static final String INPUT_FIELD = "input"; // rerank task settings fields public static final String QUERY_FIELD = "query"; + public static final String DOCUMENTS_FIELD = "documents"; + // rerank task settings fields public static final String RETURN_DOCUMENTS_FIELD = "return_documents"; public static final String TOP_K_FIELD = "top_k"; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java index 0409dd39b45d7..101314ec9b7d7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java @@ -31,17 +31,20 @@ public abstract class MixedbreadModel extends RateLimitGroupingModel { private final SecureString apiKey; private final MixedbreadRateLimitServiceSettings rateLimitServiceSettings; + private final URI uri; public MixedbreadModel( ModelConfigurations configurations, ModelSecrets secrets, @Nullable ApiKeySecrets apiKeySecrets, - MixedbreadRateLimitServiceSettings rateLimitServiceSettings + MixedbreadRateLimitServiceSettings rateLimitServiceSettings, + URI uri ) { super(configurations, secrets); this.rateLimitServiceSettings = Objects.requireNonNull(rateLimitServiceSettings); apiKey = ServiceUtils.apiKey(apiKeySecrets); + this.uri = uri; } protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) { @@ -49,6 +52,7 @@ protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) { rateLimitServiceSettings = model.rateLimitServiceSettings(); apiKey = model.apiKey(); + uri = model.uri(); } protected MixedbreadModel(MixedbreadModel model, ServiceSettings serviceSettings) { @@ -56,6 +60,7 @@ protected MixedbreadModel(MixedbreadModel model, ServiceSettings serviceSettings rateLimitServiceSettings = model.rateLimitServiceSettings(); apiKey = model.apiKey(); + uri = model.uri(); } public SecureString apiKey() { @@ -68,6 +73,10 @@ public MixedbreadRateLimitServiceSettings rateLimitServiceSettings() { public abstract ExecutableAction accept(MixedbreadActionVisitor creator, Map taskSettings); + public URI uri() { + return uri; + } + public RateLimitSettings rateLimitSettings() { return rateLimitServiceSettings.rateLimitSettings(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadResponseHandler.java deleted file mode 100644 index b22205050d1ac..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadResponseHandler.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.mixedbread; - -import org.elasticsearch.inference.InferenceServiceResults; -import org.elasticsearch.xpack.inference.external.http.HttpResult; -import org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler; -import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; -import org.elasticsearch.xpack.inference.external.http.retry.RetryException; -import org.elasticsearch.xpack.inference.external.request.Request; -import org.elasticsearch.xpack.inference.services.cohere.response.CohereErrorResponseEntity; - -import java.util.concurrent.Flow; - -public class MixedbreadResponseHandler extends BaseResponseHandler { - static final String TEXTS_ARRAY_TOO_LARGE_MESSAGE_MATCHER = "invalid request: total number of texts must be at most"; - static final String TEXTS_ARRAY_ERROR_MESSAGE = "Received a texts array too large response"; - - public MixedbreadResponseHandler(String requestType, ResponseParser parseFunction, boolean canHandleStreamingResponse) { - super(requestType, parseFunction, CohereErrorResponseEntity::fromResponse, canHandleStreamingResponse); - } - - @Override - protected void checkForFailureStatusCode(Request request, HttpResult result) throws RetryException { - if (result.isSuccessfulResponse()) { - return; - } - - // handle error codes - int statusCode = result.response().getStatusLine().getStatusCode(); - if (statusCode == 500) { - throw new RetryException(true, buildError(SERVER_ERROR, request, result)); - } else if (statusCode > 500) { - throw new RetryException(false, buildError(SERVER_ERROR, request, result)); - } else if (statusCode == 429) { - throw new RetryException(true, buildError(RATE_LIMIT, request, result)); - } else if (isTextsArrayTooLarge(result)) { - throw new RetryException(false, buildError(TEXTS_ARRAY_ERROR_MESSAGE, request, result)); - } else if (statusCode == 401) { - throw new RetryException(false, buildError(AUTHENTICATION, request, result)); - } else if (statusCode >= 300 && statusCode < 400) { - throw new RetryException(false, buildError(REDIRECTION, request, result)); - } else { - throw new RetryException(false, buildError(UNSUCCESSFUL, request, result)); - } - } - - @Override - public InferenceServiceResults parseResult(Request request, Flow.Publisher flow) { - return super.parseResult(request, flow); - } - - private static boolean isTextsArrayTooLarge(HttpResult result) { - int statusCode = result.response().getStatusLine().getStatusCode(); - - if (statusCode == 400) { - var errorEntity = CohereErrorResponseEntity.fromResponse(result); - return errorEntity != null && errorEntity.getErrorMessage().contains(TEXTS_ARRAY_TOO_LARGE_MESSAGE_MATCHER); - } - - return false; - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java index ee2e7b0d16065..e638ab3822b10 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -58,7 +58,7 @@ public class MixedbreadService extends SenderService implements RerankingInferenceService { public static final String NAME = "mixedbread"; - private static final String SERVICE_NAME = "Mixedbread"; + public static final String SERVICE_NAME = "Mixedbread"; private static final EnumSet supportedTaskTypes = EnumSet.of(TaskType.RERANK); public static final EnumSet VALID_INPUT_TYPE_VALUES = EnumSet.of( @@ -204,7 +204,7 @@ public MixedbreadModel parsePersistedConfig(String inferenceEntityId, TaskType t @Override public InferenceServiceConfiguration getConfiguration() { - return org.elasticsearch.xpack.inference.services.cohere.CohereService.Configuration.get(); + return Configuration.get(); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java index d43d1d4680e33..084bc3f1c25b3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java @@ -11,6 +11,7 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.client.utils.URIBuilder; import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.message.BasicHeader; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.common.Strings; import org.elasticsearch.core.Nullable; @@ -19,9 +20,8 @@ import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.inference.external.request.HttpRequest; import org.elasticsearch.xpack.inference.external.request.Request; -import org.elasticsearch.xpack.inference.services.cohere.CohereService; -import org.elasticsearch.xpack.inference.services.cohere.request.CohereUtils; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadAccount; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; import java.net.URI; import java.net.URISyntaxException; @@ -33,10 +33,13 @@ public abstract class MixedbreadRequest implements Request, ToXContentObject { + public static final String REQUEST_SOURCE_HEADER = "Request-Source"; + public static final String ELASTIC_REQUEST_SOURCE = "unspecified:elasticsearch"; + public static void decorateWithAuthHeader(HttpPost request, MixedbreadAccount account) { request.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()); request.setHeader(createAuthBearerHeader(account.apiKey())); - request.setHeader(CohereUtils.createRequestSourceHeader()); + request.setHeader(new BasicHeader(REQUEST_SOURCE_HEADER, ELASTIC_REQUEST_SOURCE)); } protected final MixedbreadAccount account; @@ -47,7 +50,7 @@ public static void decorateWithAuthHeader(HttpPost request, MixedbreadAccount ac protected MixedbreadRequest(MixedbreadAccount account, String inferenceEntityId, @Nullable String modelId, boolean stream) { this.account = account; this.inferenceEntityId = Objects.requireNonNull(inferenceEntityId); - this.modelId = modelId; // model is optional in the v1 api + this.modelId = modelId; this.stream = stream; } @@ -89,7 +92,7 @@ private URI buildUri(URI baseUri) { return new URIBuilder(baseUri).setPathSegments(pathSegments()).build(); } catch (URISyntaxException e) { throw new ElasticsearchStatusException( - Strings.format("Failed to construct %s URL", CohereService.NAME), + Strings.format("Failed to construct %s URL", MixedbreadService.NAME), RestStatus.BAD_REQUEST, e ); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java index b4d14853d3d4e..96626b3ddf2f7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java @@ -9,8 +9,8 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xpack.inference.services.cohere.request.CohereUtils; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadAccount; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; @@ -43,16 +43,16 @@ public MixedbreadRerankRequest( @Override protected List pathSegments() { - return List.of(CohereUtils.VERSION_1, CohereUtils.RERANK_PATH); + return List.of(MixedbreadConstants.VERSION_1, MixedbreadConstants.RERANK_PATH); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.field(CohereUtils.MODEL_FIELD, getModelId()); - builder.field(CohereUtils.QUERY_FIELD, query); - builder.field(CohereUtils.DOCUMENTS_FIELD, input); + builder.field(MixedbreadConstants.MODEL_FIELD, getModelId()); + builder.field(MixedbreadConstants.QUERY_FIELD, query); + builder.field(MixedbreadConstants.DOCUMENTS_FIELD, input); // prefer the root level return_documents over task settings if (returnDocuments != null) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java index a150fccc56943..f5fe26c469310 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java @@ -7,51 +7,74 @@ package org.elasticsearch.xpack.inference.services.mixedbread.rerank; +import org.apache.http.client.utils.URIBuilder; import org.elasticsearch.core.Nullable; import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.ModelSecrets; import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.ServiceUtils; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadModel; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor; import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.RequestUtils.buildUri; public class MixedbreadRerankModel extends MixedbreadModel { + public static final String HOST = "api.mixedbread.com"; + public static final String VERSION_1 = "v1"; + public static final String RERANK_PATH = "reranking"; + + private static final URIBuilder DEFAULT_URI_BUILDER = new URIBuilder().setScheme("https") + .setHost(HOST) + .setPathSegments(VERSION_1, RERANK_PATH); + public static MixedbreadRerankModel of(MixedbreadRerankModel model, Map taskSettings) { var requestTaskSettings = MixedbreadRerankTaskSettings.fromMap(taskSettings); + if (requestTaskSettings.isEmpty() || requestTaskSettings.equals(model.getTaskSettings())) { + return model; + } return new MixedbreadRerankModel(model, MixedbreadRerankTaskSettings.of(model.getTaskSettings(), requestTaskSettings)); } public MixedbreadRerankModel( - String modelId, + String inferenceId, Map serviceSettings, Map taskSettings, @Nullable Map secrets, ConfigurationParseContext context ) { this( - modelId, + inferenceId, MixedbreadRerankServiceSettings.fromMap(serviceSettings, context), MixedbreadRerankTaskSettings.fromMap(taskSettings), - DefaultSecretSettings.fromMap(secrets) + DefaultSecretSettings.fromMap(secrets), + null ); } + // should only be used for testing public MixedbreadRerankModel( String modelId, MixedbreadRerankServiceSettings serviceSettings, MixedbreadRerankTaskSettings taskSettings, - @Nullable DefaultSecretSettings secretSettings + @Nullable DefaultSecretSettings secretSettings, + @Nullable String uri ) { super( new ModelConfigurations(modelId, TaskType.RERANK, MixedbreadService.NAME, serviceSettings, taskSettings), new ModelSecrets(secretSettings), secretSettings, - serviceSettings + serviceSettings, + Objects.requireNonNullElse( + ServiceUtils.createOptionalUri(uri), + buildUri(MixedbreadService.SERVICE_NAME, DEFAULT_URI_BUILDER::build) + ) ); } @@ -80,7 +103,7 @@ public DefaultSecretSettings getSecretSettings() { /** * Accepts a visitor to create an executable action. The returned action will not return documents in the response. - * @param visitor Interface for creating {@link ExecutableAction} instances for Cohere models. + * @param visitor Interface for creating {@link ExecutableAction} instances for Mixedbread models. * @param taskSettings Settings in the request to override the model's defaults * @return the rerank action */ diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java index 3265fc87f17b6..40a1ce6a4a7a4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -27,6 +27,9 @@ import java.util.Objects; import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; +import static org.elasticsearch.xpack.inference.services.ServiceFields.URL; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.convertToUri; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.createOptionalUri; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString; public class MixedbreadRerankServiceSettings extends FilteredXContentObject implements ServiceSettings, MixedbreadRateLimitServiceSettings { @@ -49,6 +52,9 @@ public class MixedbreadRerankServiceSettings extends FilteredXContentObject impl public static MixedbreadRerankServiceSettings fromMap(Map map, ConfigurationParseContext context) { ValidationException validationException = new ValidationException(); + String url = extractOptionalString(map, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); + + URI uri = convertToUri(url, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); String model = extractOptionalString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); RateLimitSettings rateLimitSettings = RateLimitSettings.of( map, @@ -62,21 +68,24 @@ public static MixedbreadRerankServiceSettings fromMap(Map map, C throw validationException; } - return new MixedbreadRerankServiceSettings(model, rateLimitSettings); + return new MixedbreadRerankServiceSettings(model, rateLimitSettings, uri); } private final String model; private final RateLimitSettings rateLimitSettings; + private final URI uri; - public MixedbreadRerankServiceSettings(@Nullable String model, @Nullable RateLimitSettings rateLimitSettings) { + public MixedbreadRerankServiceSettings(@Nullable String model, @Nullable RateLimitSettings rateLimitSettings, @Nullable URI uri) { this.model = model; this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); + this.uri = uri; } public MixedbreadRerankServiceSettings(StreamInput in) throws IOException { this.model = in.readOptionalString(); this.rateLimitSettings = new RateLimitSettings(in); + this.uri = createOptionalUri(in.readOptionalString()); } @Override @@ -91,7 +100,7 @@ public RateLimitSettings rateLimitSettings() { @Override public URI uri() { - return null; + return uri; } @Override @@ -112,6 +121,10 @@ protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder buil rateLimitSettings.toXContent(builder, params); + if (uri != null) { + builder.field(URL, uri.toString()); + } + return builder; } @@ -130,6 +143,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(model); rateLimitSettings.writeTo(out); + var uriToWrite = uri != null ? uri.toString() : null; + out.writeOptionalString(uriToWrite); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java index be535e51750db..b259df8d93408 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java @@ -115,7 +115,7 @@ private static List doParse(XContentParser parser) // This should never be reached. The above code should either return successfully or hit the throwUnknownToken // or throw a parsing exception - throw new IllegalStateException("Reached an invalid state while parsing the Cohere response"); + throw new IllegalStateException("Reached an invalid state while parsing the Mixedbread response"); } private record RankedDocEntry(Integer index, Float score, @Nullable String text) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java new file mode 100644 index 0000000000000..f6419cd5ba128 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java @@ -0,0 +1,126 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread; + +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.InferenceServiceConfiguration; +import org.elasticsearch.inference.RerankingInferenceService; +import org.elasticsearch.test.http.MockWebServer; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.external.http.HttpClientManager; +import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.logging.ThrottlerManager; +import org.elasticsearch.xpack.inference.services.InferenceServiceTestCase; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import static org.elasticsearch.common.xcontent.XContentHelper.toXContent; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; +import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityExecutors; +import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; +import static org.elasticsearch.xpack.inference.services.ServiceComponentsTests.createWithEmptySettings; +import static org.hamcrest.CoreMatchers.is; +import static org.mockito.Mockito.mock; + +public class MixedbreadServiceTests extends InferenceServiceTestCase { + private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); + private final MockWebServer webServer = new MockWebServer(); + private ThreadPool threadPool; + private HttpClientManager clientManager; + + @Before + public void init() throws Exception { + webServer.start(); + threadPool = createThreadPool(inferenceUtilityExecutors()); + clientManager = HttpClientManager.create(Settings.EMPTY, threadPool, mockClusterServiceEmpty(), mock(ThrottlerManager.class)); + } + + @After + public void shutdown() throws IOException { + clientManager.close(); + terminate(threadPool); + webServer.close(); + } + + public void testGetConfiguration() throws Exception { + try (var service = createMixedbreadService()) { + String content = XContentHelper.stripWhitespace(""" + { + "service": "mixedbread", + "name": "Mixedbread", + "task_types": ["rerank"], + "configurations": { + "api_key": { + "description": "API Key for the provider you're connecting to.", + "label": "API Key", + "required": true, + "sensitive": true, + "updatable": true, + "type": "str", + "supported_task_types": ["rerank"] + }, + "model_id": { + "description": "The name of the model to use for the inference task.", + "label": "Model ID", + "required": true, + "sensitive": false, + "updatable": false, + "type": "str", + "supported_task_types": ["rerank"] + }, + "rate_limit.requests_per_minute": { + "description": "Minimize the number of rate limit errors.", + "label": "Rate Limit", + "required": false, + "sensitive": false, + "updatable": false, + "type": "int", + "supported_task_types": ["rerank"] + } + } + } + """); + InferenceServiceConfiguration configuration = InferenceServiceConfiguration.fromXContentBytes( + new BytesArray(content), + XContentType.JSON + ); + boolean humanReadable = true; + BytesReference originalBytes = toShuffledXContent(configuration, XContentType.JSON, ToXContent.EMPTY_PARAMS, humanReadable); + InferenceServiceConfiguration serviceConfiguration = service.getConfiguration(); + assertToXContentEquivalent( + originalBytes, + toXContent(serviceConfiguration, XContentType.JSON, humanReadable), + XContentType.JSON + ); + } + } + + private MixedbreadService createMixedbreadService() { + return new MixedbreadService(mock(HttpRequestSender.Factory.class), createWithEmptySettings(threadPool), mockClusterServiceEmpty()); + } + + @Override + public InferenceService createInferenceService() { + return createMixedbreadService(); + } + + @Override + protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) { + assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(2800)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java index cfa3e7efd5e7b..cf79e6bc64e85 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java @@ -67,7 +67,7 @@ private static MixedbreadRerankRequest createRequest( @Nullable Integer topN, @Nullable Boolean returnDocuments ) { - var rerankModel = MixedbreadRerankModelTests.createModel(modelId, API_KEY); + var rerankModel = MixedbreadRerankModelTests.createModel(modelId, API_KEY, null, null, null); return new MixedbreadRerankRequest(query, List.of(input), returnDocuments, topN, rerankModel); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java index de90a46b457be..023ec02f74969 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java @@ -8,15 +8,83 @@ package org.elasticsearch.xpack.inference.services.mixedbread.rerank; import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; -public class MixedbreadRerankModelTests { - public static MixedbreadRerankModel createModel(String model, String apiKey) { +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.jinaai.rerank.JinaAIRerankTaskSettingsTests.getTaskSettingsMap; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; + +public class MixedbreadRerankModelTests extends ESTestCase { + + public static final String DEFAULT_URL = "https://api.mixedbread.com/v1/reranking"; + public static final String CUSTOM_URL = "https://custom.url.com/v1/reranking"; + public static final String MODEL_ID = "model_id"; + public static final String API_KEY = "secret"; + + public void testConstructor_usesDefaultUrlWhenNull() { + var model = createModel(MODEL_ID, API_KEY, null, null, null); + assertThat(model.uri().toString(), is(DEFAULT_URL)); + } + + public void testConstructor_usesUrlWhenSpecified() { + var model = createModel(MODEL_ID, API_KEY, CUSTOM_URL, null, null); + assertThat(model.uri().toString(), is(CUSTOM_URL)); + } + + public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreEmpty() { + var model = createModel(MODEL_ID, API_KEY, null, 10, true); + var overriddenModel = MixedbreadRerankModel.of(model, Map.of()); + assertThat(overriddenModel, sameInstance(model)); + } + + public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreNull() { + var model = createModel(MODEL_ID, API_KEY, null, 10, true); + var overriddenModel = MixedbreadRerankModel.of(model, null); + assertThat(overriddenModel, sameInstance(model)); + } + + public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreEqual() { + var topN = randomNonNegativeInt(); + var returnDocuments = randomBoolean(); + var model = createModel(MODEL_ID, API_KEY, null, topN, returnDocuments); + var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(topN, returnDocuments)); + assertThat(overriddenModel, sameInstance(model)); + } + + public void testOf_SetsTopN_FromRequestTaskSettings_OverridingStoredTaskSettings() { + var model = createModel(MODEL_ID, API_KEY, null, 15, null); + var topNFromRequest = 10; + var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(topNFromRequest, null)); + var expectedModel = createModel(MODEL_ID, API_KEY, null, topNFromRequest, null); + assertThat(overriddenModel, is(expectedModel)); + } + + public void testOf_SetsReturnDocuments_FromRequestTaskSettings() { + var topN = 15; + var model = createModel(MODEL_ID, API_KEY, null, topN, true); + var returnDocumentsFromRequest = false; + var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(null, returnDocumentsFromRequest)); + var expectedModel = createModel(MODEL_ID, API_KEY, null, topN, returnDocumentsFromRequest); + assertThat(overriddenModel, is(expectedModel)); + } + + public static MixedbreadRerankModel createModel( + String model, + String apiKey, + String uri, + @Nullable Integer topN, + @Nullable Boolean returnDocuments + ) { return new MixedbreadRerankModel( model, - new MixedbreadRerankServiceSettings(model, null), - new MixedbreadRerankTaskSettings(null, null), - new DefaultSecretSettings(new SecureString(apiKey.toCharArray())) + new MixedbreadRerankServiceSettings(model, null, null), + new MixedbreadRerankTaskSettings(topN, returnDocuments), + new DefaultSecretSettings(new SecureString(apiKey.toCharArray())), + uri ); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java index 3254952a2d314..538f2990b3e84 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java @@ -31,13 +31,13 @@ public static MixedbreadRerankServiceSettings createRandom() { } public static MixedbreadRerankServiceSettings createRandom(@Nullable RateLimitSettings rateLimitSettings) { - return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings); + return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings, null); } public void testToXContent_WritesAllValues() throws IOException { var model = "model"; - var serviceSettings = new MixedbreadRerankServiceSettings(model, null); + var serviceSettings = new MixedbreadRerankServiceSettings(model, null, null); XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); serviceSettings.toXContent(builder, null); @@ -73,7 +73,7 @@ protected MixedbreadRerankServiceSettings mutateInstance(MixedbreadRerankService default -> throw new AssertionError("Illegal randomisation branch"); } - return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings); + return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings, null); } public static Map getServiceSettingsMap(@Nullable String url, @Nullable String model) { From 9ec0f7db9f094ebade28d3a81c9bb95bb014049c Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Tue, 20 Jan 2026 15:29:59 +0100 Subject: [PATCH 05/48] Add action creator tests --- .../mixedbread/MixedbreadService.java | 22 ++- .../rerank/MixedbreadRerankTaskSettings.java | 4 + .../action/MixedbreadActionCreatorTests.java | 175 ++++++++++++++++++ 3 files changed, 191 insertions(+), 10 deletions(-) create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java index e638ab3822b10..e6ecb035e987b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -53,6 +53,7 @@ import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwUnsupportedUnifiedCompletionOperation; public class MixedbreadService extends SenderService implements RerankingInferenceService { @@ -70,11 +71,6 @@ public class MixedbreadService extends SenderService implements RerankingInferen InputType.INTERNAL_SEARCH ); - // TODO Batching - We'll instantiate a batching class within the services that want to support it and pass it through to - // the Cohere*RequestManager via the CohereActionCreator class - // The reason it needs to be done here is that the batching logic needs to hold state but the *RequestManagers are instantiated - // on every request - public MixedbreadService( HttpRequestSender.Factory factory, ServiceComponents serviceComponents, @@ -104,7 +100,11 @@ public void parseRequestConfig( Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); ChunkingSettings chunkingSettings = null; - + if (TaskType.TEXT_EMBEDDING.equals(taskType) || TaskType.EMBEDDING.equals(taskType)) { + chunkingSettings = ChunkingSettingsBuilder.fromMap( + removeFromMapOrDefaultEmpty(config, ModelConfigurations.CHUNKING_SETTINGS) + ); + } MixedbreadModel model = createModel( inferenceEntityId, taskType, @@ -115,9 +115,9 @@ public void parseRequestConfig( ConfigurationParseContext.REQUEST ); - // throwIfNotEmptyMap(config, NAME); - // throwIfNotEmptyMap(serviceSettingsMap, NAME); - // throwIfNotEmptyMap(taskSettingsMap, NAME); + throwIfNotEmptyMap(config, NAME); + throwIfNotEmptyMap(serviceSettingsMap, NAME); + throwIfNotEmptyMap(taskSettingsMap, NAME); parsedModelListener.onResponse(model); } catch (Exception e) { @@ -171,7 +171,9 @@ public MixedbreadModel parsePersistedConfigWithSecrets( Map secretSettingsMap = removeFromMapOrThrowIfNull(secrets, ModelSecrets.SECRET_SETTINGS); ChunkingSettings chunkingSettings = null; - + if (TaskType.TEXT_EMBEDDING.equals(taskType) || TaskType.EMBEDDING.equals(taskType)) { + chunkingSettings = ChunkingSettingsBuilder.fromMap(removeFromMap(config, ModelConfigurations.CHUNKING_SETTINGS)); + } return createModelWithoutLoggingDeprecations( inferenceEntityId, taskType, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java index c95515cc05f87..df68e733d08aa 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java @@ -50,6 +50,10 @@ public static MixedbreadRerankTaskSettings fromMap(Map map) { throw validationException; } + if (returnDocuments == null && topNDocumentsOnly == null) { + return EMPTY_SETTINGS; + } + return of(topNDocumentsOnly, returnDocuments); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java new file mode 100644 index 0000000000000..e059050cac026 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java @@ -0,0 +1,175 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.action; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.http.MockWebServer; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.http.HttpClientManager; +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.sender.QueryAndDocsInputs; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.logging.ThrottlerManager; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModelTests; +import org.hamcrest.MatcherAssert; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityExecutors; +import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; +import static org.elasticsearch.xpack.inference.external.http.Utils.getUrl; +import static org.elasticsearch.xpack.inference.services.ServiceComponentsTests.createWithEmptySettings; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +public class MixedbreadActionCreatorTests extends ESTestCase { + private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); + private static final QueryAndDocsInputs QUERY_AND_DOCS_INPUTS = new QueryAndDocsInputs( + "popular name", + List.of("Luke"), + false, + 3, + false + ); + private final MockWebServer webServer = new MockWebServer(); + private ThreadPool threadPool; + private HttpClientManager clientManager; + + @Before + public void init() throws Exception { + webServer.start(); + threadPool = createThreadPool(inferenceUtilityExecutors()); + clientManager = HttpClientManager.create(Settings.EMPTY, threadPool, mockClusterServiceEmpty(), mock(ThrottlerManager.class)); + } + + @After + public void shutdown() throws IOException { + clientManager.close(); + terminate(threadPool); + webServer.close(); + } + + public void testExecute_ThrowsURISyntaxException_ForInvalidUrl() throws IOException { + try (var sender = mock(Sender.class)) { + var thrownException = expectThrows( + IllegalArgumentException.class, + () -> createAction("model", "secret", "^^", null, null, sender) + ); + MatcherAssert.assertThat(thrownException.getMessage(), containsString("unable to parse url [^^]")); + } + } + + public void testExecute_ThrowsElasticsearchException() { + var sender = mock(Sender.class); + doThrow(new ElasticsearchException("failed")).when(sender).send(any(), any(), any(), any()); + + var action = createAction("model", "secret", getUrl(webServer), null, null, sender); + ElasticsearchException thrownException = executeActionWithException(action); + + MatcherAssert.assertThat(thrownException.getMessage(), is("failed")); + } + + public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled() { + var sender = mock(Sender.class); + + doAnswer(invocation -> { + ActionListener listener = invocation.getArgument(3); + listener.onFailure(new IllegalStateException("failed")); + + return Void.TYPE; + }).when(sender).send(any(), any(), any(), any()); + + var action = createAction("model", "secret", getUrl(webServer), null, null, sender); + ElasticsearchException thrownException = executeActionWithException(action); + + MatcherAssert.assertThat( + thrownException.getMessage(), + is("Failed to send Mixedbread rerank request from inference entity id [model]. Cause: failed") + ); + } + + public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled_WhenUrlIsNull() { + var sender = mock(Sender.class); + + doAnswer(invocation -> { + ActionListener listener = invocation.getArgument(3); + listener.onFailure(new IllegalStateException("failed")); + + return Void.TYPE; + }).when(sender).send(any(), any(), any(), any()); + + var action = createAction("model", "secret", null, null, null, sender); + ElasticsearchException thrownException = executeActionWithException(action); + + MatcherAssert.assertThat( + thrownException.getMessage(), + is("Failed to send Mixedbread rerank request from inference entity id [model]. Cause: failed") + ); + } + + public void testExecute_ThrowsException() { + var sender = mock(Sender.class); + doThrow(new IllegalArgumentException("failed")).when(sender).send(any(), any(), any(), any()); + + var action = createAction("model", "secret", getUrl(webServer), null, null, sender); + ElasticsearchException thrownException = executeActionWithException(action); + + MatcherAssert.assertThat( + thrownException.getMessage(), + is("Failed to send Mixedbread rerank request from inference entity id [model]. Cause: failed") + ); + } + + public void testExecute_ThrowsExceptionWithNullUrl() { + var sender = mock(Sender.class); + doThrow(new IllegalArgumentException("failed")).when(sender).send(any(), any(), any(), any()); + + var action = createAction("model", "secret", null, null, null, sender); + var thrownException = executeActionWithException(action); + + MatcherAssert.assertThat( + thrownException.getMessage(), + is("Failed to send Mixedbread rerank request from inference entity id [model]. Cause: failed") + ); + } + + private static ElasticsearchException executeActionWithException(ExecutableAction action) { + PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(QUERY_AND_DOCS_INPUTS, InferenceAction.Request.DEFAULT_TIMEOUT, listener); + return expectThrows(ElasticsearchException.class, () -> listener.actionGet(TIMEOUT)); + } + + private ExecutableAction createAction( + String modelName, + String apiKey, + String url, + Integer topN, + Boolean returnDocuments, + Sender sender + ) { + var actionCreator = new MixedbreadActionCreator(sender, createWithEmptySettings(threadPool)); + var model = MixedbreadRerankModelTests.createModel(modelName, apiKey, url, topN, returnDocuments); + return actionCreator.create(model, null); + } +} From 4cb12db4d523d2e6b511ee00b08707739b31d18b Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Thu, 22 Jan 2026 21:06:14 +0100 Subject: [PATCH 06/48] Make windows size configurable --- .../MixedbreadRateLimitServiceSettings.java | 2 + .../mixedbread/MixedbreadService.java | 24 +++++++++-- .../MixedbreadRerankServiceSettings.java | 20 +++++++++- .../mixedbread/MixedbreadServiceTests.java | 2 +- .../rerank/MixedbreadRerankModelTests.java | 2 +- .../MixedbreadRerankServiceSettingsTests.java | 40 ++++++++++++++----- 6 files changed, 72 insertions(+), 18 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java index 6bbd42dea100b..2f36d7fa82b93 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java @@ -15,4 +15,6 @@ public interface MixedbreadRateLimitServiceSettings { RateLimitSettings rateLimitSettings(); URI uri(); + + Integer windowSize(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java index e6ecb035e987b..2df7949c1615c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -71,6 +71,24 @@ public class MixedbreadService extends SenderService implements RerankingInferen InputType.INTERNAL_SEARCH ); + private static final Map RERANKERS_INPUT_SIZE = Map.of( + "mixedbread-ai/mxbai-rerank-xsmall-v1", + 512, + "mixedbread-ai/mxbai-rerank-base-v1", + 512, + "mixedbread-ai/mxbai-rerank-large-v1", + 512 + // Windows size. + // The v1 models: 512 + // The v2 models: at least 8k + // https://www.mixedbread.com/docs/models/reranking/mxbai-rerank-large-v1 + ); + + /** + * Apart from v1 all other models have a context length of at least 8k. + */ + private static final int DEFAULT_RERANKER_INPUT_SIZE_WORDS = 8000; + public MixedbreadService( HttpRequestSender.Factory factory, ServiceComponents serviceComponents, @@ -273,10 +291,8 @@ public Set supportedStreamingTasks() { @Override public int rerankerWindowSize(String modelId) { - // Cohere rerank model truncates at 4096 tokens https://docs.cohere.com/reference/rerank - // Using 1 token = 0.75 words as a rough estimate, we get 3072 words - // allowing for some headroom, we set the window size below 3072 - return 2800; + Integer inputSize = RERANKERS_INPUT_SIZE.get(modelId); + return inputSize != null ? inputSize : DEFAULT_RERANKER_INPUT_SIZE_WORDS; } public static class Configuration { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java index 40a1ce6a4a7a4..241f7db4aa3c6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -30,11 +30,13 @@ import static org.elasticsearch.xpack.inference.services.ServiceFields.URL; import static org.elasticsearch.xpack.inference.services.ServiceUtils.convertToUri; import static org.elasticsearch.xpack.inference.services.ServiceUtils.createOptionalUri; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalInteger; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString; public class MixedbreadRerankServiceSettings extends FilteredXContentObject implements ServiceSettings, MixedbreadRateLimitServiceSettings { public static final String NAME = "mixedbread_ai_rerank_service_settings"; + public static final String WINDOWS_SIZE = "windows_size"; /** * Applied different rate limits based on the type of operation performed: @@ -48,6 +50,7 @@ public class MixedbreadRerankServiceSettings extends FilteredXContentObject impl * Rate Limiting. */ private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(240); + private static final Integer DEFAULT_WINDOWS_SIZE = 8000; public static MixedbreadRerankServiceSettings fromMap(Map map, ConfigurationParseContext context) { ValidationException validationException = new ValidationException(); @@ -56,6 +59,7 @@ public static MixedbreadRerankServiceSettings fromMap(Map map, C URI uri = convertToUri(url, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); String model = extractOptionalString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + Integer windowsSize = extractOptionalInteger(map, WINDOWS_SIZE, ModelConfigurations.SERVICE_SETTINGS, validationException); RateLimitSettings rateLimitSettings = RateLimitSettings.of( map, DEFAULT_RATE_LIMIT_SETTINGS, @@ -68,24 +72,28 @@ public static MixedbreadRerankServiceSettings fromMap(Map map, C throw validationException; } - return new MixedbreadRerankServiceSettings(model, rateLimitSettings, uri); + return new MixedbreadRerankServiceSettings(model, rateLimitSettings, uri, windowsSize); } private final String model; private final RateLimitSettings rateLimitSettings; private final URI uri; + private final Integer windowsSize; - public MixedbreadRerankServiceSettings(@Nullable String model, @Nullable RateLimitSettings rateLimitSettings, @Nullable URI uri) { + public MixedbreadRerankServiceSettings( + @Nullable String model, @Nullable RateLimitSettings rateLimitSettings, @Nullable URI uri, @Nullable Integer windowsSize) { this.model = model; this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); this.uri = uri; + this.windowsSize = Objects.requireNonNullElse(windowsSize, DEFAULT_WINDOWS_SIZE); } public MixedbreadRerankServiceSettings(StreamInput in) throws IOException { this.model = in.readOptionalString(); this.rateLimitSettings = new RateLimitSettings(in); this.uri = createOptionalUri(in.readOptionalString()); + this.windowsSize = in.readOptionalInt(); } @Override @@ -103,6 +111,11 @@ public URI uri() { return uri; } + @Override + public Integer windowSize() { + return windowsSize; + } + @Override public String getWriteableName() { return NAME; @@ -125,6 +138,8 @@ protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder buil builder.field(URL, uri.toString()); } + builder.field(WINDOWS_SIZE, windowsSize); + return builder; } @@ -145,6 +160,7 @@ public void writeTo(StreamOutput out) throws IOException { rateLimitSettings.writeTo(out); var uriToWrite = uri != null ? uri.toString() : null; out.writeOptionalString(uriToWrite); + out.writeOptionalInt(windowsSize); } @Override diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java index f6419cd5ba128..350fb8750ded7 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java @@ -121,6 +121,6 @@ public InferenceService createInferenceService() { @Override protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) { - assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(2800)); + assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(8000)); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java index 023ec02f74969..aaca3677346cd 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java @@ -81,7 +81,7 @@ public static MixedbreadRerankModel createModel( ) { return new MixedbreadRerankModel( model, - new MixedbreadRerankServiceSettings(model, null, null), + new MixedbreadRerankServiceSettings(model, null, null, null), new MixedbreadRerankTaskSettings(topN, returnDocuments), new DefaultSecretSettings(new SecureString(apiKey.toCharArray())), uri diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java index 538f2990b3e84..6538bb2025d21 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java @@ -19,40 +19,60 @@ import org.elasticsearch.xpack.inference.services.settings.RateLimitSettingsTests; import java.io.IOException; +import java.net.URI; import java.util.HashMap; import java.util.Map; import static org.elasticsearch.xpack.inference.MatchersUtils.equalToIgnoringWhitespaceInJsonString; public class MixedbreadRerankServiceSettingsTests extends AbstractWireSerializingTestCase { + private static final String MODEL = "model"; + private static final RateLimitSettings RATE_LIMIT = new RateLimitSettings(2); + private static final Integer WINDOWS_SIZE = 512; + private static final URI URI = java.net.URI.create("uri"); public static MixedbreadRerankServiceSettings createRandom() { return createRandom(randomFrom(new RateLimitSettings[] { null, RateLimitSettingsTests.createRandom() })); } public static MixedbreadRerankServiceSettings createRandom(@Nullable RateLimitSettings rateLimitSettings) { - return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings, null); + return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings, null, null); } public void testToXContent_WritesAllValues() throws IOException { - var model = "model"; + var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, RATE_LIMIT, URI, WINDOWS_SIZE); + assertThat(getXContentResult(serviceSettings), equalToIgnoringWhitespaceInJsonString(""" + { + "model_id":"model", + "rate_limit": { + "requests_per_minute": 2 + }, + "url": "uri", + "windows_size": 512 + } + """)); + } - var serviceSettings = new MixedbreadRerankServiceSettings(model, null, null); - XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); - serviceSettings.toXContent(builder, null); - String xContentResult = Strings.toString(builder); - - assertThat(xContentResult, equalToIgnoringWhitespaceInJsonString(""" + public void testToXContent_DoesNotWriteOptionalValues_DefaultRateLimit_And_DefaultWindowsSize() throws IOException { + var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, null, null, null); + assertThat(getXContentResult(serviceSettings), equalToIgnoringWhitespaceInJsonString(""" { "model_id":"model", "rate_limit": { "requests_per_minute": 240 - } + }, + "windows_size": 8000 } """)); } + private String getXContentResult(MixedbreadRerankServiceSettings serviceSettings) throws IOException { + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + serviceSettings.toXContent(builder, null); + return Strings.toString(builder); + } + @Override protected Writeable.Reader instanceReader() { return MixedbreadRerankServiceSettings::new; @@ -73,7 +93,7 @@ protected MixedbreadRerankServiceSettings mutateInstance(MixedbreadRerankService default -> throw new AssertionError("Illegal randomisation branch"); } - return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings, null); + return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings, null, null); } public static Map getServiceSettingsMap(@Nullable String url, @Nullable String model) { From 18d5ce4787c9108d6ba6938c80abf377d9b4143f Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Thu, 22 Jan 2026 21:12:44 +0100 Subject: [PATCH 07/48] Address comments and add service tests --- .../mixedbread/MixedbreadAccount.java | 40 -- .../mixedbread/MixedbreadConstants.java | 2 +- .../services/mixedbread/MixedbreadModel.java | 14 +- .../MixedbreadRateLimitServiceSettings.java | 4 - .../mixedbread/MixedbreadService.java | 106 ++-- .../services/mixedbread/MixedbreadUtils.java | 37 ++ .../action/MixedbreadActionCreator.java | 34 +- .../mixedbread/request/MixedbreadRequest.java | 117 ---- .../request/MixedbreadRerankRequest.java | 76 +-- .../rerank/MixedbreadRerankModel.java | 4 +- .../MixedbreadRerankRequestTaskSettings.java | 48 -- .../MixedbreadRerankResponseHandler.java | 8 +- .../MixedbreadRerankServiceSettings.java | 56 +- .../rerank/MixedbreadRerankTaskSettings.java | 10 +- .../rerank/MixedbreadResponseHandler.java | 70 +++ .../response/MixedbreadErrorResponse.java | 29 - .../MixedbreadRerankResponseEntity.java | 4 +- .../mixedbread/MixedbreadServiceTests.java | 505 +++++++++++++++++- .../action/MixedbreadActionCreatorTests.java | 21 +- .../MixedbreadRerankRequestEntityTests.java | 8 +- .../request/MixedbreadRerankRequestTests.java | 13 +- .../rerank/MixedbreadRerankModelTests.java | 8 +- .../MixedbreadRerankServiceSettingsTests.java | 29 +- .../MixedbreadRerankTaskSettingsTests.java | 6 +- 24 files changed, 776 insertions(+), 473 deletions(-) delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadAccount.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankRequestTaskSettings.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadResponseHandler.java delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadErrorResponse.java diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadAccount.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadAccount.java deleted file mode 100644 index dc795efa80b7b..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadAccount.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.mixedbread; - -import org.apache.http.client.utils.URIBuilder; -import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.settings.SecureString; -import org.elasticsearch.rest.RestStatus; - -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Objects; - -public record MixedbreadAccount(URI baseUri, SecureString apiKey) { - - public static MixedbreadAccount of(MixedbreadModel model) { - try { - var uri = model.baseUri() != null ? model.baseUri() : new URIBuilder().setScheme("https").setHost("api.mixedbread.com").build(); - return new MixedbreadAccount(uri, model.apiKey()); - } catch (URISyntaxException e) { - // using bad request here so that potentially sensitive URL information does not get logged - throw new ElasticsearchStatusException( - Strings.format("Failed to construct %s URL", MixedbreadService.NAME), - RestStatus.BAD_REQUEST, - e - ); - } - } - - public MixedbreadAccount { - Objects.requireNonNull(baseUri); - Objects.requireNonNull(apiKey); - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java index 2f7cca8f40564..6fa1fed2efd22 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java @@ -22,7 +22,7 @@ public class MixedbreadConstants { public static final String DOCUMENTS_FIELD = "documents"; // rerank task settings fields - public static final String RETURN_DOCUMENTS_FIELD = "return_documents"; + public static final String RETURN_DOCUMENTS_FIELD = "return_input"; public static final String TOP_K_FIELD = "top_k"; private MixedbreadConstants() {} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java index 101314ec9b7d7..b7fc69095350b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java @@ -21,6 +21,7 @@ import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; import java.net.URI; +import java.net.URISyntaxException; import java.util.Map; import java.util.Objects; @@ -31,7 +32,7 @@ public abstract class MixedbreadModel extends RateLimitGroupingModel { private final SecureString apiKey; private final MixedbreadRateLimitServiceSettings rateLimitServiceSettings; - private final URI uri; + protected URI uri; public MixedbreadModel( ModelConfigurations configurations, @@ -85,7 +86,16 @@ public int rateLimitGroupingHash() { return apiKey().hashCode(); } + // Needed for testing only + public void setURI(String newUri) { + try { + this.uri = new URI(newUri); + } catch (URISyntaxException e) { + // swallow any error + } + } + public URI baseUri() { - return rateLimitServiceSettings.uri(); + return uri; } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java index 2f36d7fa82b93..dfc23253ef23c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java @@ -9,12 +9,8 @@ import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; -import java.net.URI; - public interface MixedbreadRateLimitServiceSettings { RateLimitSettings rateLimitSettings(); - URI uri(); - Integer windowSize(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java index 2df7949c1615c..3a6f41be92b12 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -10,6 +10,7 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.util.LazyInitializable; import org.elasticsearch.core.Nullable; @@ -28,7 +29,6 @@ import org.elasticsearch.inference.SettingsConfiguration; import org.elasticsearch.inference.TaskType; import org.elasticsearch.inference.configuration.SettingsConfigurationFieldType; -import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsBuilder; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput; @@ -45,31 +45,22 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidTaskTypeException; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwUnsupportedUnifiedCompletionOperation; +/** + * Mixedbread inference service for reranking tasks. + * This service uses the Mixedbread REST API to perform document reranking. + */ public class MixedbreadService extends SenderService implements RerankingInferenceService { public static final String NAME = "mixedbread"; - public static final String SERVICE_NAME = "Mixedbread"; - private static final EnumSet supportedTaskTypes = EnumSet.of(TaskType.RERANK); - - public static final EnumSet VALID_INPUT_TYPE_VALUES = EnumSet.of( - InputType.INGEST, - InputType.SEARCH, - InputType.CLASSIFICATION, - InputType.CLUSTERING, - InputType.INTERNAL_INGEST, - InputType.INTERNAL_SEARCH - ); + + // private static final TransportVersion MIXEDBREAD_SERVICE = TransportVersion.fromName("mixedbread_service"); + private static final EnumSet SUPPORTED_TASK_TYPES = EnumSet.of(TaskType.RERANK); private static final Map RERANKERS_INPUT_SIZE = Map.of( "mixedbread-ai/mxbai-rerank-xsmall-v1", @@ -114,15 +105,11 @@ public void parseRequestConfig( ActionListener parsedModelListener ) { try { - Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); - Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + Map serviceSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = ServiceUtils.removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); ChunkingSettings chunkingSettings = null; - if (TaskType.TEXT_EMBEDDING.equals(taskType) || TaskType.EMBEDDING.equals(taskType)) { - chunkingSettings = ChunkingSettingsBuilder.fromMap( - removeFromMapOrDefaultEmpty(config, ModelConfigurations.CHUNKING_SETTINGS) - ); - } + MixedbreadModel model = createModel( inferenceEntityId, taskType, @@ -133,9 +120,9 @@ public void parseRequestConfig( ConfigurationParseContext.REQUEST ); - throwIfNotEmptyMap(config, NAME); - throwIfNotEmptyMap(serviceSettingsMap, NAME); - throwIfNotEmptyMap(taskSettingsMap, NAME); + ServiceUtils.throwIfNotEmptyMap(config, NAME); + ServiceUtils.throwIfNotEmptyMap(serviceSettingsMap, NAME); + ServiceUtils.throwIfNotEmptyMap(taskSettingsMap, NAME); parsedModelListener.onResponse(model); } catch (Exception e) { @@ -143,7 +130,7 @@ public void parseRequestConfig( } } - private static MixedbreadModel createModelWithoutLoggingDeprecations( + private MixedbreadModel parsePersistedConfigWithSecrets( String inferenceEntityId, TaskType taskType, Map serviceSettings, @@ -171,10 +158,11 @@ private static MixedbreadModel createModel( @Nullable Map secretSettings, ConfigurationParseContext context ) { - return switch (taskType) { - case RERANK -> new MixedbreadRerankModel(inferenceEntityId, serviceSettings, taskSettings, secretSettings, context); - default -> throw createInvalidTaskTypeException(inferenceEntityId, NAME, taskType, context); - }; + if (taskType != TaskType.RERANK) { + throw createInvalidTaskTypeException(inferenceEntityId, NAME, taskType, context); + } + + return new MixedbreadRerankModel(inferenceEntityId, serviceSettings, taskSettings, secretSettings, context); } @Override @@ -184,15 +172,13 @@ public MixedbreadModel parsePersistedConfigWithSecrets( Map config, Map secrets ) { - Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); - Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); - Map secretSettingsMap = removeFromMapOrThrowIfNull(secrets, ModelSecrets.SECRET_SETTINGS); + Map serviceSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = ServiceUtils.removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + Map secretSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(secrets, ModelSecrets.SECRET_SETTINGS); ChunkingSettings chunkingSettings = null; - if (TaskType.TEXT_EMBEDDING.equals(taskType) || TaskType.EMBEDDING.equals(taskType)) { - chunkingSettings = ChunkingSettingsBuilder.fromMap(removeFromMap(config, ModelConfigurations.CHUNKING_SETTINGS)); - } - return createModelWithoutLoggingDeprecations( + + return parsePersistedConfigWithSecrets( inferenceEntityId, taskType, serviceSettingsMap, @@ -204,22 +190,12 @@ public MixedbreadModel parsePersistedConfigWithSecrets( @Override public MixedbreadModel parsePersistedConfig(String inferenceEntityId, TaskType taskType, Map config) { - Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); - Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + Map serviceSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = ServiceUtils.removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); ChunkingSettings chunkingSettings = null; - if (TaskType.TEXT_EMBEDDING.equals(taskType)) { - chunkingSettings = ChunkingSettingsBuilder.fromMap(removeFromMap(config, ModelConfigurations.CHUNKING_SETTINGS)); - } - return createModelWithoutLoggingDeprecations( - inferenceEntityId, - taskType, - serviceSettingsMap, - taskSettingsMap, - chunkingSettings, - null - ); + return parsePersistedConfigWithSecrets(inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, chunkingSettings, null); } @Override @@ -229,7 +205,7 @@ public InferenceServiceConfiguration getConfiguration() { @Override public EnumSet supportedTaskTypes() { - return supportedTaskTypes; + return SUPPORTED_TASK_TYPES; } @Override @@ -251,7 +227,12 @@ protected void doChunkedInfer( TimeValue timeout, ActionListener> listener ) { + throw new UnsupportedOperationException(Strings.format("%s service does not support chunked inference", NAME)); + } + @Override + protected boolean supportsChunkedInfer() { + return false; } @Override @@ -275,20 +256,13 @@ public void doInfer( } @Override - protected void validateInputType(InputType inputType, Model model, ValidationException validationException) { - ServiceUtils.validateInputTypeAgainstAllowlist(inputType, VALID_INPUT_TYPE_VALUES, SERVICE_NAME, validationException); - } + protected void validateInputType(InputType inputType, Model model, ValidationException validationException) {} @Override public TransportVersion getMinimalSupportedVersion() { return TransportVersion.minimumCompatible(); } - @Override - public Set supportedStreamingTasks() { - return COMPLETION_ONLY; - } - @Override public int rerankerWindowSize(String modelId) { Integer inputSize = RERANKERS_INPUT_SIZE.get(modelId); @@ -306,9 +280,7 @@ public static InferenceServiceConfiguration get() { configurationMap.put( MODEL_ID, - new SettingsConfiguration.Builder(supportedTaskTypes).setDescription( - "The name of the model to use for the inference task." - ) + new SettingsConfiguration.Builder(SUPPORTED_TASK_TYPES).setDescription("The model ID to use for Mixedbread requests.") .setLabel("Model ID") .setRequired(true) .setSensitive(false) @@ -317,12 +289,12 @@ public static InferenceServiceConfiguration get() { .build() ); - configurationMap.putAll(DefaultSecretSettings.toSettingsConfiguration(supportedTaskTypes)); - configurationMap.putAll(RateLimitSettings.toSettingsConfiguration(supportedTaskTypes)); + configurationMap.putAll(DefaultSecretSettings.toSettingsConfiguration(SUPPORTED_TASK_TYPES)); + configurationMap.putAll(RateLimitSettings.toSettingsConfiguration(SUPPORTED_TASK_TYPES)); return new InferenceServiceConfiguration.Builder().setService(NAME) .setName(SERVICE_NAME) - .setTaskTypes(supportedTaskTypes) + .setTaskTypes(SUPPORTED_TASK_TYPES) .setConfigurations(configurationMap) .build(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java new file mode 100644 index 0000000000000..38d363d75b5e2 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread; + +import org.elasticsearch.TransportVersion; + +/** + * Utility class for Mixedbread related version checks. + */ +public final class MixedbreadUtils { + + /** + * TransportVersion indicating when Mixedbread features were added. + */ + public static final TransportVersion ML_INFERENCE_MIXEDBREAD_ADDED = TransportVersion.fromName("ml_inference_mixedbread_added"); + + /** + * Checks if the given TransportVersion supports Mixedbread features. + * + * @param version the TransportVersion to check + * @return true if Mixedbread features are supported, false otherwise + */ + public static boolean supportsMixedbread(TransportVersion version) { + return version.supports(ML_INFERENCE_MIXEDBREAD_ADDED); + } + + /** + * Private constructor to prevent instantiation. + */ + private MixedbreadUtils() {} + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java index df07599bd1572..e01e3e33991cd 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.inference.services.mixedbread.action; -import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction; import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler; @@ -23,23 +22,15 @@ import java.util.Map; import java.util.Objects; -import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage; public class MixedbreadActionCreator implements MixedbreadActionVisitor { - private static final String FAILED_TO_SEND_REQUEST_ERROR_MESSAGE = "Failed to send Mixedbread %s request from inference entity id [%s]"; - private static final String INVALID_REQUEST_TYPE_MESSAGE = "Invalid request type: expected Mixedbread %s request but got %s"; + private static final String RERANK_ERROR_PREFIX = "Mixedbread rerank"; - private static final ResponseHandler RERANK_HANDLER = new MixedbreadRerankResponseHandler("mixedbread rerank", (request, response) -> { - if ((request instanceof MixedbreadRerankRequest) == false) { - var errorMessage = format( - INVALID_REQUEST_TYPE_MESSAGE, - "RERANK", - request != null ? request.getClass().getSimpleName() : "null" - ); - throw new IllegalArgumentException(errorMessage); - } - return MixedbreadRerankResponseEntity.fromResponse(response); - }); + private static final ResponseHandler RERANK_HANDLER = new MixedbreadRerankResponseHandler( + "mixedbread rerank", + (request, response) -> MixedbreadRerankResponseEntity.fromResponse(response) + ); private final Sender sender; private final ServiceComponents serviceComponents; @@ -71,18 +62,7 @@ public ExecutableAction create(MixedbreadRerankModel model, Map ), QueryAndDocsInputs.class ); - var errorMessage = buildErrorMessage(TaskType.RERANK, model.getInferenceEntityId()); + var errorMessage = constructFailedToSendRequestMessage(RERANK_ERROR_PREFIX); return new SenderExecutableAction(sender, manager, errorMessage); } - - /** - * Builds an error message for failed requests. - * - * @param requestType the type of request that failed - * @param inferenceId the inference entity ID associated with the request - * @return a formatted error message - */ - public static String buildErrorMessage(TaskType requestType, String inferenceId) { - return format(FAILED_TO_SEND_REQUEST_ERROR_MESSAGE, requestType.toString(), inferenceId); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java deleted file mode 100644 index 084bc3f1c25b3..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRequest.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.mixedbread.request; - -import org.apache.http.HttpHeaders; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.entity.ByteArrayEntity; -import org.apache.http.message.BasicHeader; -import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.common.Strings; -import org.elasticsearch.core.Nullable; -import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.xcontent.ToXContentObject; -import org.elasticsearch.xcontent.XContentType; -import org.elasticsearch.xpack.inference.external.request.HttpRequest; -import org.elasticsearch.xpack.inference.external.request.Request; -import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadAccount; -import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; - -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Objects; - -import static org.elasticsearch.xpack.inference.external.request.RequestUtils.createAuthBearerHeader; - -public abstract class MixedbreadRequest implements Request, ToXContentObject { - - public static final String REQUEST_SOURCE_HEADER = "Request-Source"; - public static final String ELASTIC_REQUEST_SOURCE = "unspecified:elasticsearch"; - - public static void decorateWithAuthHeader(HttpPost request, MixedbreadAccount account) { - request.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()); - request.setHeader(createAuthBearerHeader(account.apiKey())); - request.setHeader(new BasicHeader(REQUEST_SOURCE_HEADER, ELASTIC_REQUEST_SOURCE)); - } - - protected final MixedbreadAccount account; - private final String inferenceEntityId; - private final String modelId; - private final boolean stream; - - protected MixedbreadRequest(MixedbreadAccount account, String inferenceEntityId, @Nullable String modelId, boolean stream) { - this.account = account; - this.inferenceEntityId = Objects.requireNonNull(inferenceEntityId); - this.modelId = modelId; - this.stream = stream; - } - - @Override - public HttpRequest createHttpRequest() { - HttpPost httpPost = new HttpPost(getURI()); - - ByteArrayEntity byteEntity = new ByteArrayEntity(Strings.toString(this).getBytes(StandardCharsets.UTF_8)); - httpPost.setEntity(byteEntity); - - decorateWithAuthHeader(httpPost, account); - - return new HttpRequest(httpPost, getInferenceEntityId()); - } - - @Override - public String getInferenceEntityId() { - return inferenceEntityId; - } - - @Override - public boolean isStreaming() { - return stream; - } - - @Override - public URI getURI() { - return buildUri(account.baseUri()); - } - - /** - * Returns the URL path segments. - * @return List of segments that make up the path of the request. - */ - protected abstract List pathSegments(); - - private URI buildUri(URI baseUri) { - try { - return new URIBuilder(baseUri).setPathSegments(pathSegments()).build(); - } catch (URISyntaxException e) { - throw new ElasticsearchStatusException( - Strings.format("Failed to construct %s URL", MixedbreadService.NAME), - RestStatus.BAD_REQUEST, - e - ); - } - } - - public String getModelId() { - return modelId; - } - - @Override - public Request truncate() { - // no truncation - return this; - } - - @Override - public boolean[] getTruncationInfo() { - // no truncation - return null; - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java index 96626b3ddf2f7..f9d6e6fdf1126 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java @@ -7,18 +7,26 @@ package org.elasticsearch.xpack.inference.services.mixedbread.request; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.ByteArrayEntity; +import org.elasticsearch.common.Strings; import org.elasticsearch.core.Nullable; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadAccount; -import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.external.request.HttpRequest; +import org.elasticsearch.xpack.inference.external.request.Request; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; -import java.io.IOException; +import java.net.URI; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Objects; -public class MixedbreadRerankRequest extends MixedbreadRequest { +import static org.elasticsearch.xpack.inference.external.request.RequestUtils.createAuthBearerHeader; + +public class MixedbreadRerankRequest implements Request { + private final MixedbreadRerankModel model; private final String query; private final List input; private final Boolean returnDocuments; @@ -32,47 +40,49 @@ public MixedbreadRerankRequest( @Nullable Integer topN, MixedbreadRerankModel model ) { - super(MixedbreadAccount.of(model), model.getInferenceEntityId(), model.getServiceSettings().modelId(), false); - this.input = Objects.requireNonNull(input); this.query = Objects.requireNonNull(query); this.returnDocuments = returnDocuments; this.topN = topN; taskSettings = model.getTaskSettings(); + this.model = Objects.requireNonNull(model); } - @Override - protected List pathSegments() { - return List.of(MixedbreadConstants.VERSION_1, MixedbreadConstants.RERANK_PATH); - } + public HttpRequest createHttpRequest() { + HttpPost httpPost = new HttpPost(model.uri()); - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); + ByteArrayEntity byteEntity = new ByteArrayEntity( + Strings.toString( + new MixedbreadRerankRequestEntity(model.getServiceSettings().modelId(), query, input, topN, returnDocuments, taskSettings) + ).getBytes(StandardCharsets.UTF_8) + ); + httpPost.setEntity(byteEntity); - builder.field(MixedbreadConstants.MODEL_FIELD, getModelId()); - builder.field(MixedbreadConstants.QUERY_FIELD, query); - builder.field(MixedbreadConstants.DOCUMENTS_FIELD, input); + httpPost.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()); + httpPost.setHeader(createAuthBearerHeader(model.apiKey())); - // prefer the root level return_documents over task settings - if (returnDocuments != null) { - builder.field(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, returnDocuments); - } else if (taskSettings.getDoesReturnDocuments() != null) { - builder.field(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, taskSettings.getDoesReturnDocuments()); - } + return new HttpRequest(httpPost, getInferenceEntityId()); + } - // prefer the root level top_n over task settings - if (topN != null) { - builder.field(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, topN); - } else if (taskSettings.getTopNDocumentsOnly() != null) { - builder.field(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, taskSettings.getTopNDocumentsOnly()); - } + @Override + public URI getURI() { + return model.uri(); + } + + @Override + public Request truncate() { + // no truncation + return this; + } - builder.endObject(); - return builder; + @Override + public boolean[] getTruncationInfo() { + // no truncation + return null; } - public Integer getTopN() { - return topN != null ? topN : taskSettings.getTopNDocumentsOnly(); + @Override + public String getInferenceEntityId() { + return model.getInferenceEntityId(); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java index f5fe26c469310..226ff56957979 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java @@ -15,6 +15,7 @@ import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.ServiceUtils; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadModel; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor; @@ -28,11 +29,10 @@ public class MixedbreadRerankModel extends MixedbreadModel { public static final String HOST = "api.mixedbread.com"; public static final String VERSION_1 = "v1"; - public static final String RERANK_PATH = "reranking"; private static final URIBuilder DEFAULT_URI_BUILDER = new URIBuilder().setScheme("https") .setHost(HOST) - .setPathSegments(VERSION_1, RERANK_PATH); + .setPathSegments(VERSION_1, MixedbreadConstants.RERANK_PATH); public static MixedbreadRerankModel of(MixedbreadRerankModel model, Map taskSettings) { var requestTaskSettings = MixedbreadRerankTaskSettings.fromMap(taskSettings); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankRequestTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankRequestTaskSettings.java deleted file mode 100644 index 4f589abc6e368..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankRequestTaskSettings.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.mixedbread.rerank; - -import org.elasticsearch.common.ValidationException; -import org.elasticsearch.core.Nullable; -import org.elasticsearch.inference.ModelConfigurations; - -import java.util.Map; - -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalBoolean; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; -import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.RETURN_DOCUMENTS_FIELD; -import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.TOP_K_FIELD; - -public record MixedbreadRerankRequestTaskSettings(@Nullable Boolean returnDocuments, @Nullable Integer topN) { - - public static final MixedbreadRerankRequestTaskSettings EMPTY_SETTINGS = new MixedbreadRerankRequestTaskSettings(null, null); - - /** - * Extracts the task settings from a map. All settings are considered optional and the absence of a setting - * does not throw an error. - * - * @param map the settings received from a request - * @return a {@link MixedbreadRerankRequestTaskSettings} - */ - public static MixedbreadRerankRequestTaskSettings fromMap(Map map) { - if (map.isEmpty()) { - return MixedbreadRerankRequestTaskSettings.EMPTY_SETTINGS; - } - - final var validationException = new ValidationException(); - - final var returnDocuments = extractOptionalBoolean(map, RETURN_DOCUMENTS_FIELD, validationException); - final var topN = extractOptionalPositiveInteger(map, TOP_K_FIELD, ModelConfigurations.TASK_SETTINGS, validationException); - - if (validationException.validationErrors().isEmpty() == false) { - throw validationException; - } - - return new MixedbreadRerankRequestTaskSettings(returnDocuments, topN); - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java index 90393addf24d4..a17f19238f291 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java @@ -8,17 +8,15 @@ package org.elasticsearch.xpack.inference.services.mixedbread.rerank; import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; -import org.elasticsearch.xpack.inference.services.mixedbread.response.MixedbreadErrorResponse; -import org.elasticsearch.xpack.inference.services.openai.OpenAiResponseHandler; -public class MixedbreadRerankResponseHandler extends OpenAiResponseHandler { +public class MixedbreadRerankResponseHandler extends MixedbreadResponseHandler { /** - * Constructs a new MixedbreadEmbeddingsResponseHandler with the specified request type and response parser. + * Constructs a new MixedbreadRerankResponseHandler with the specified request type and response parser. * * @param requestType the type of request this handler will process * @param parseFunction the function to parse the response */ public MixedbreadRerankResponseHandler(String requestType, ResponseParser parseFunction) { - super(requestType, parseFunction, MixedbreadErrorResponse::fromResponse, false); + super(requestType, parseFunction); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java index 241f7db4aa3c6..b6760d5d08f9b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -18,47 +18,34 @@ import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadRateLimitServiceSettings; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadUtils; import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject; import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; import java.io.IOException; -import java.net.URI; import java.util.Map; import java.util.Objects; import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; -import static org.elasticsearch.xpack.inference.services.ServiceFields.URL; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.convertToUri; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.createOptionalUri; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalInteger; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; public class MixedbreadRerankServiceSettings extends FilteredXContentObject implements ServiceSettings, MixedbreadRateLimitServiceSettings { - public static final String NAME = "mixedbread_ai_rerank_service_settings"; + public static final String NAME = "mixedbread_rerank_service_settings"; public static final String WINDOWS_SIZE = "windows_size"; /** - * Applied different rate limits based on the type of operation performed: - - * Operation Type Limit Burst Capacity Window - * Read 1,200 1,000 1-minute - * List 600 200 1-minute - * Write 360 120 1-minute - * Update 480 160 1-minute - * Delete 240 80 1-minute - * Rate Limiting. + * 100 req / min + * Rate Limiting. */ - private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(240); + public static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(100); private static final Integer DEFAULT_WINDOWS_SIZE = 8000; public static MixedbreadRerankServiceSettings fromMap(Map map, ConfigurationParseContext context) { ValidationException validationException = new ValidationException(); - String url = extractOptionalString(map, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); - - URI uri = convertToUri(url, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); - String model = extractOptionalString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + String model = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); Integer windowsSize = extractOptionalInteger(map, WINDOWS_SIZE, ModelConfigurations.SERVICE_SETTINGS, validationException); RateLimitSettings rateLimitSettings = RateLimitSettings.of( map, @@ -72,27 +59,27 @@ public static MixedbreadRerankServiceSettings fromMap(Map map, C throw validationException; } - return new MixedbreadRerankServiceSettings(model, rateLimitSettings, uri, windowsSize); + return new MixedbreadRerankServiceSettings(model, rateLimitSettings, windowsSize); } private final String model; private final RateLimitSettings rateLimitSettings; - private final URI uri; private final Integer windowsSize; public MixedbreadRerankServiceSettings( - @Nullable String model, @Nullable RateLimitSettings rateLimitSettings, @Nullable URI uri, @Nullable Integer windowsSize) { + @Nullable String model, + @Nullable RateLimitSettings rateLimitSettings, + @Nullable Integer windowsSize + ) { this.model = model; this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); - this.uri = uri; this.windowsSize = Objects.requireNonNullElse(windowsSize, DEFAULT_WINDOWS_SIZE); } public MixedbreadRerankServiceSettings(StreamInput in) throws IOException { this.model = in.readOptionalString(); this.rateLimitSettings = new RateLimitSettings(in); - this.uri = createOptionalUri(in.readOptionalString()); this.windowsSize = in.readOptionalInt(); } @@ -106,11 +93,6 @@ public RateLimitSettings rateLimitSettings() { return rateLimitSettings; } - @Override - public URI uri() { - return uri; - } - @Override public Integer windowSize() { return windowsSize; @@ -123,7 +105,13 @@ public String getWriteableName() { @Override public TransportVersion getMinimalSupportedVersion() { - return TransportVersion.minimumCompatible(); + assert false : "should never be called when supportsVersion is used"; + return MixedbreadUtils.ML_INFERENCE_MIXEDBREAD_ADDED; + } + + @Override + public boolean supportsVersion(TransportVersion version) { + return MixedbreadUtils.supportsMixedbread(version); } @Override @@ -134,10 +122,6 @@ protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder buil rateLimitSettings.toXContent(builder, params); - if (uri != null) { - builder.field(URL, uri.toString()); - } - builder.field(WINDOWS_SIZE, windowsSize); return builder; @@ -158,8 +142,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(model); rateLimitSettings.writeTo(out); - var uriToWrite = uri != null ? uri.toString() : null; - out.writeOptionalString(uriToWrite); out.writeOptionalInt(windowsSize); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java index df68e733d08aa..c5e8e1afb8928 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java @@ -29,7 +29,7 @@ public class MixedbreadRerankTaskSettings implements TaskSettings { public static final String RETURN_DOCUMENTS = "return_documents"; public static final String TOP_N_DOCS_ONLY = "top_n"; - static final MixedbreadRerankTaskSettings EMPTY_SETTINGS = new MixedbreadRerankTaskSettings(null, null); + public static final MixedbreadRerankTaskSettings EMPTY_SETTINGS = new MixedbreadRerankTaskSettings(null, null); public static MixedbreadRerankTaskSettings fromMap(Map map) { ValidationException validationException = new ValidationException(); @@ -86,7 +86,7 @@ public static MixedbreadRerankTaskSettings of(Integer topNDocumentsOnly, Boolean private final Boolean returnDocuments; public MixedbreadRerankTaskSettings(StreamInput in) throws IOException { - this(in.readOptionalInt(), in.readOptionalBoolean()); + this(in.readOptionalVInt(), in.readOptionalBoolean()); } public MixedbreadRerankTaskSettings(@Nullable Integer topNDocumentsOnly, @Nullable Boolean doReturnDocuments) { @@ -124,7 +124,7 @@ public TransportVersion getMinimalSupportedVersion() { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalInt(topNDocumentsOnly); + out.writeOptionalVInt(topNDocumentsOnly); out.writeOptionalBoolean(returnDocuments); } @@ -141,10 +141,6 @@ public int hashCode() { return Objects.hash(returnDocuments, topNDocumentsOnly); } - public Boolean getDoesReturnDocuments() { - return returnDocuments; - } - public Integer getTopNDocumentsOnly() { return topNDocumentsOnly; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadResponseHandler.java new file mode 100644 index 0000000000000..342a7b0c451e5 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadResponseHandler.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.mixedbread.rerank; + +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler; +import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; +import org.elasticsearch.xpack.inference.external.http.retry.RetryException; +import org.elasticsearch.xpack.inference.external.request.Request; + +import static org.elasticsearch.core.Strings.format; + +public class MixedbreadResponseHandler extends BaseResponseHandler { + private static final String FORBIDDEN = "Valid credentials but insufficient permissions for this resource."; + private static final String PAYMENT_ERROR_MESSAGE = "Insufficient balance. Top up your account to continue."; + private static final String SERVICE_UNAVAILABLE = "Service temporarily down for maintenance or overloaded. Retry later."; + private static final String UNPROCESSABLE_ENTITY = "Request format is correct but cannot be processed."; + + public MixedbreadResponseHandler(String requestType, ResponseParser parseFunction) { + super(requestType, parseFunction, ErrorResponse::fromResponse); + } + + /** + * Validates the status code throws an RetryException if not in the range [200, 300). + * + * @param request The http request + * @param result The http response and body + * @throws RetryException Throws if status code is {@code >= 400 } + */ + @Override + protected void checkForFailureStatusCode(Request request, HttpResult result) throws RetryException { + if (result.isSuccessfulResponse()) { + return; + } + + // handle error codes + int statusCode = result.response().getStatusLine().getStatusCode(); + if (statusCode == 500) { + throw new RetryException(true, buildError(SERVER_ERROR, request, result)); + } else if (statusCode == 503) { + throw new RetryException(true, buildError(SERVICE_UNAVAILABLE, request, result)); + } else if (statusCode == 429) { + throw new RetryException(true, buildError(RATE_LIMIT, request, result)); + } else if (statusCode == 422) { + throw new RetryException(true, buildError(UNPROCESSABLE_ENTITY, request, result)); + } else if (statusCode == 404) { + throw new RetryException(false, buildError(resourceNotFoundError(request), request, result)); + } else if (statusCode == 403) { + throw new RetryException(false, buildError(FORBIDDEN, request, result)); + } else if (statusCode == 402) { + throw new RetryException(false, buildError(PAYMENT_ERROR_MESSAGE, request, result)); + } else if (statusCode == 401) { + throw new RetryException(false, buildError(AUTHENTICATION, request, result)); + } else if (statusCode == 400) { + throw new RetryException(false, buildError(BAD_REQUEST, request, result)); + } else { + throw new RetryException(false, buildError(UNSUCCESSFUL, request, result)); + } + } + + private static String resourceNotFoundError(Request request) { + return format("Resource not found at [%s]", request.getURI()); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadErrorResponse.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadErrorResponse.java deleted file mode 100644 index 3e4d4d14cf7d8..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadErrorResponse.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.mixedbread.response; - -import org.elasticsearch.xpack.inference.external.http.HttpResult; -import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse; - -import java.nio.charset.StandardCharsets; - -public class MixedbreadErrorResponse extends ErrorResponse { - public MixedbreadErrorResponse(String message) { - super(message); - } - - public static ErrorResponse fromResponse(HttpResult response) { - try { - String errorMessage = new String(response.body(), StandardCharsets.UTF_8); - return new MixedbreadErrorResponse(errorMessage); - } catch (Exception e) { - // swallow the error - } - return ErrorResponse.UNDEFINED_ERROR; - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java index b259df8d93408..45256f0b95456 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java @@ -29,6 +29,8 @@ import static org.elasticsearch.xpack.inference.external.response.XContentUtils.positionParserAtTokenAfterField; public class MixedbreadRerankResponseEntity { + private static final String FAILED_TO_FIND_FIELD_TEMPLATE = "Failed to find required field [%s] in Mixedbread rerank response"; + /** * Parses the Mixedbread rerank response. @@ -101,7 +103,7 @@ private static List doParse(XContentParser parser) XContentParser.Token token = parser.currentToken(); ensureExpectedToken(XContentParser.Token.START_OBJECT, token, parser); - positionParserAtTokenAfterField(parser, "data", "FAILED_TO_FIND_FIELD_TEMPLATE"); // TODO error message + positionParserAtTokenAfterField(parser, "data", FAILED_TO_FIND_FIELD_TEMPLATE); token = parser.currentToken(); if (token == XContentParser.Token.START_ARRAY) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java index 350fb8750ded7..63bee718039e5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java @@ -7,38 +7,68 @@ package org.elasticsearch.xpack.inference.services.mixedbread; +import org.apache.http.HttpHeaders; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.InferenceService; import org.elasticsearch.inference.InferenceServiceConfiguration; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.RerankingInferenceService; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.http.MockResponse; import org.elasticsearch.test.http.MockWebServer; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.inference.external.http.HttpClientManager; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSenderTests; import org.elasticsearch.xpack.inference.logging.ThrottlerManager; import org.elasticsearch.xpack.inference.services.InferenceServiceTestCase; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModelTests; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettings; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettingsTests; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettingsTests; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; import org.junit.After; import org.junit.Before; import java.io.IOException; -import java.util.concurrent.TimeUnit; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import static org.elasticsearch.common.xcontent.XContentHelper.toXContent; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent; +import static org.elasticsearch.xpack.inference.Utils.getPersistedConfigMap; import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityExecutors; import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; +import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap; +import static org.elasticsearch.xpack.inference.external.http.Utils.getUrl; import static org.elasticsearch.xpack.inference.services.ServiceComponentsTests.createWithEmptySettings; +import static org.elasticsearch.xpack.inference.services.jinaai.JinaAIServiceSettingsTests.getServiceSettingsMap; +import static org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettingsTests.getSecretSettingsMap; import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; import static org.mockito.Mockito.mock; public class MixedbreadServiceTests extends InferenceServiceTestCase { - private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); + private static final String INFERENCE_ENTITY_ID_VALUE = "id"; + public static final String DEFAULT_RERANK_URL = "https://api.mixedbread.com/v1/rerank"; + private static final String MODEL_NAME_VALUE = "modelName"; + private static final List INPUT = List.of("candidate1", "candidate2", "candidate3"); private final MockWebServer webServer = new MockWebServer(); private ThreadPool threadPool; private HttpClientManager clientManager; @@ -57,6 +87,422 @@ public void shutdown() throws IOException { webServer.close(); } + public void testParseRequestConfig_createsRerankModel() throws IOException { + try (var service = createMixedbreadService()) { + var modelName = randomAlphanumericOfLength(8); + var requestsPerMinute = randomNonNegativeInt(); + var topN = randomNonNegativeInt(); + var returnDocuments = randomBoolean(); + var apiKey = randomAlphanumericOfLength(8); + + var modelListener = new PlainActionFuture(); + + service.parseRequestConfig( + INFERENCE_ENTITY_ID_VALUE, + TaskType.RERANK, + getRequestConfigMap( + MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName, requestsPerMinute), + MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(topN, returnDocuments), + getSecretSettingsMap(apiKey) + ), + modelListener + ); + + assertRerankModelSettings( + modelListener.actionGet(), + modelName, + new RateLimitSettings(requestsPerMinute), + apiKey, + new MixedbreadRerankTaskSettings(topN, returnDocuments) + ); + } + } + + public void testParseRequestConfig_onlyRequiredSettings_createsRerankModel() throws IOException { + try (var service = createMixedbreadService()) { + var modelName = randomAlphanumericOfLength(8); + var apiKey = randomAlphanumericOfLength(8); + + var modelListener = new PlainActionFuture(); + + service.parseRequestConfig( + INFERENCE_ENTITY_ID_VALUE, + TaskType.RERANK, + getRequestConfigMap(MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName), getSecretSettingsMap(apiKey)), + modelListener + ); + + assertRerankModelSettings( + modelListener.actionGet(), + modelName, + MixedbreadRerankServiceSettings.DEFAULT_RATE_LIMIT_SETTINGS, + apiKey, + MixedbreadRerankTaskSettings.EMPTY_SETTINGS + ); + + } + } + + public void testParsePersistedConfigWithSecrets_createsRerankModel() throws IOException { + try (var service = createMixedbreadService()) { + var modelName = randomAlphanumericOfLength(8); + var requestsPerMinute = randomNonNegativeInt(); + var topN = randomNonNegativeInt(); + var returnDocuments = randomBoolean(); + var apiKey = randomAlphanumericOfLength(8); + + var persistedConfig = getPersistedConfigMap( + MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName, requestsPerMinute), + MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(topN, returnDocuments), + getSecretSettingsMap(apiKey) + ); + + var model = service.parsePersistedConfigWithSecrets( + INFERENCE_ENTITY_ID_VALUE, + TaskType.RERANK, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertRerankModelSettings( + model, + modelName, + new RateLimitSettings(requestsPerMinute), + apiKey, + new MixedbreadRerankTaskSettings(topN, returnDocuments) + ); + } + } + + public void testParsePersistedConfigWithSecrets_onlyRequiredSettings_createsRerankModel() throws IOException { + try (var service = createMixedbreadService()) { + var modelName = randomAlphanumericOfLength(8); + var apiKey = randomAlphanumericOfLength(8); + + var persistedConfig = getPersistedConfigMap(getServiceSettingsMap(modelName, null), Map.of(), getSecretSettingsMap(apiKey)); + + var model = service.parsePersistedConfigWithSecrets( + INFERENCE_ENTITY_ID_VALUE, + TaskType.RERANK, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertRerankModelSettings( + model, + modelName, + MixedbreadRerankServiceSettings.DEFAULT_RATE_LIMIT_SETTINGS, + apiKey, + MixedbreadRerankTaskSettings.EMPTY_SETTINGS + ); + } + } + + public void testParsePersistedConfig_createsRerankModel() throws IOException { + try (var service = createMixedbreadService()) { + var modelName = randomAlphanumericOfLength(8); + var requestsPerMinute = randomNonNegativeInt(); + var topN = randomNonNegativeInt(); + var returnDocuments = randomBoolean(); + + var persistedConfig = getPersistedConfigMap( + MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName, requestsPerMinute), + MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(topN, returnDocuments), + null + ); + + var model = service.parsePersistedConfig(INFERENCE_ENTITY_ID_VALUE, TaskType.RERANK, persistedConfig.config()); + + assertRerankModelSettings( + model, + modelName, + new RateLimitSettings(requestsPerMinute), + "", + new MixedbreadRerankTaskSettings(topN, returnDocuments) + ); + } + } + + public void testInfer_Rerank_UnauthorisedResponse() throws IOException { + var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); + + try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { + + String responseJson = """ + { + "detail": "Unauthorized" + } + """; + webServer.enqueue(new MockResponse().setResponseCode(401).setBody(responseJson)); + + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", "uri", 1024, false); + model.setURI(getUrl(webServer)); + + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + model, + "query", + null, + null, + List.of("candidate1", "candidate2"), + false, + new HashMap<>(), + null, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var error = expectThrows(ElasticsearchException.class, () -> listener.actionGet(TEST_REQUEST_TIMEOUT)); + assertThat(error.getMessage(), containsString("Received an authentication error status code for request")); + assertThat(error.getMessage(), containsString("Unauthorized")); + assertThat(webServer.requests(), hasSize(1)); + } + } + + public void testInfer_Rerank_Get_Response_NoReturnDocuments_NoTopN() throws IOException { + String responseJson = """ + { + "usage": { + "prompt_tokens": 162, + "total_tokens": 162, + "completion_tokens": 0 + }, + "model": "modelName", + "data": [ + { + "index": 0, + "score": 0.98291015625, + "object": "rank_result" + }, + { + "index": 2, + "score": 0.61962890625, + "object": "rank_result" + }, + { + "index": 3, + "score": 0.3642578125, + "object": "rank_result" + } + ], + "object": "list", + "return_input": false + } + """; + var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); + + try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { + webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", getUrl(webServer), null, false); + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + model, + "query", + null, + null, + INPUT, + false, + new HashMap<>(), + null, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var result = listener.actionGet(TEST_REQUEST_TIMEOUT); + var resultAsMap = result.asMap(); + assertThat( + resultAsMap, + is( + Map.of( + "rerank", + List.of( + Map.of("ranked_doc", Map.of("index", 0, "relevance_score", 0.98291016F)), + Map.of("ranked_doc", Map.of("index", 2, "relevance_score", 0.6196289F)), + Map.of("ranked_doc", Map.of("index", 3, "relevance_score", 0.3642578F)) + ) + ) + ) + ); + + assertThat(webServer.requests(), hasSize(1)); + assertThat(webServer.requests().getFirst().getHeader(HttpHeaders.CONTENT_TYPE), equalTo(XContentType.JSON.mediaType())); + assertThat(webServer.requests().getFirst().getHeader(HttpHeaders.AUTHORIZATION), equalTo("Bearer secret")); + + var requestMap = entityAsMap(webServer.requests().getFirst().getBody()); + assertThat(requestMap, is(Map.of("query", "query", "input", INPUT, "model", MODEL_NAME_VALUE, "return_input", false))); + } + } + + public void testInfer_Rerank_Get_Response_ReturnDocumentsNull_NoTopN() throws IOException { + String responseJson = """ + { + "usage": { + "prompt_tokens": 162, + "total_tokens": 162, + "completion_tokens": 0 + }, + "model": "modelName", + "data": [ + { + "index": 0, + "score": 0.98291015625, + "input": "candidate3", + "object": "rank_result" + }, + { + "index": 2, + "score": 0.61962890625, + "input": "candidate2", + "object": "rank_result" + }, + { + "index": 3, + "score": 0.3642578125, + "input": "candidate1", + "object": "rank_result" + } + ], + "object": "list" + } + """; + var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); + + try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { + webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", getUrl(webServer), null, null); + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + model, + "query", + null, + null, + INPUT, + false, + new HashMap<>(), + null, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var result = listener.actionGet(TEST_REQUEST_TIMEOUT); + var resultAsMap = result.asMap(); + assertThat( + resultAsMap, + is( + Map.of( + "rerank", + List.of( + Map.of("ranked_doc", Map.of("index", 0, "relevance_score", 0.98291015625F, "text", "candidate3")), + Map.of("ranked_doc", Map.of("index", 2, "relevance_score", 0.61962890625F, "text", "candidate2")), + Map.of("ranked_doc", Map.of("index", 3, "relevance_score", 0.3642578125F, "text", "candidate1")) + ) + ) + ) + ); + assertThat(webServer.requests(), hasSize(1)); + assertThat(webServer.requests().getFirst().getHeader(HttpHeaders.CONTENT_TYPE), equalTo(XContentType.JSON.mediaType())); + assertThat(webServer.requests().getFirst().getHeader(HttpHeaders.AUTHORIZATION), equalTo("Bearer secret")); + + var requestMap = entityAsMap(webServer.requests().getFirst().getBody()); + assertThat(requestMap, is(Map.of("query", "query", "input", INPUT, "model", MODEL_NAME_VALUE))); + + } + } + + public void testInfer_Rerank_Get_Response_ReturnDocuments_TopN() throws IOException { + String responseJson = """ + { + "usage": { + "prompt_tokens": 162, + "total_tokens": 162, + "completion_tokens": 0 + }, + "model": "modelName", + "data": [ + { + "index": 0, + "score": 0.98291015625, + "input": "candidate3", + "object": "rank_result" + }, + { + "index": 2, + "score": 0.61962890625, + "input": "candidate2", + "object": "rank_result" + }, + { + "index": 3, + "score": 0.3642578125, + "input": "candidate1", + "object": "rank_result" + } + ], + "object": "list", + "top_k": 3, + "return_input": true + } + """; + var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); + + try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { + webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", getUrl(webServer), 3, true); + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + model, + "query", + null, + null, + List.of("candidate1", "candidate2", "candidate3", "candidate4"), + false, + new HashMap<>(), + null, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var result = listener.actionGet(TEST_REQUEST_TIMEOUT); + var resultAsMap = result.asMap(); + assertThat( + resultAsMap, + is( + Map.of( + "rerank", + List.of( + Map.of("ranked_doc", Map.of("text", "candidate3", "index", 0, "relevance_score", 0.98291015625F)), + Map.of("ranked_doc", Map.of("text", "candidate2", "index", 2, "relevance_score", 0.61962890625F)), + Map.of("ranked_doc", Map.of("text", "candidate1", "index", 3, "relevance_score", 0.3642578125F)) + ) + ) + ) + ); + assertThat(webServer.requests(), hasSize(1)); + assertThat(webServer.requests().getFirst().getHeader(HttpHeaders.CONTENT_TYPE), equalTo(XContentType.JSON.mediaType())); + assertThat(webServer.requests().getFirst().getHeader(HttpHeaders.AUTHORIZATION), equalTo("Bearer secret")); + + var requestMap = entityAsMap(webServer.requests().getFirst().getBody()); + assertThat( + requestMap, + is( + Map.of( + "query", + "query", + "input", + List.of("candidate1", "candidate2", "candidate3", "candidate4"), + "model", + MODEL_NAME_VALUE, + "return_input", + true, + "top_k", + 3 + ) + ) + ); + + } + } + public void testGetConfiguration() throws Exception { try (var service = createMixedbreadService()) { String content = XContentHelper.stripWhitespace(""" @@ -75,7 +521,7 @@ public void testGetConfiguration() throws Exception { "supported_task_types": ["rerank"] }, "model_id": { - "description": "The name of the model to use for the inference task.", + "description": "The model ID to use for Mixedbread requests.", "label": "Model ID", "required": true, "sensitive": false, @@ -110,6 +556,57 @@ public void testGetConfiguration() throws Exception { } } + private static void assertRerankModelSettings( + Model model, + String modelName, + RateLimitSettings rateLimitSettings, + String apiKey, + MixedbreadRerankTaskSettings taskSettings + ) { + assertThat(model, instanceOf(MixedbreadRerankModel.class)); + + var rerankModel = (MixedbreadRerankModel) model; + assertCommonModelSettings(rerankModel, DEFAULT_RERANK_URL, modelName, rateLimitSettings, apiKey); + + assertThat(rerankModel.getTaskSettings(), is(taskSettings)); + } + + private static void assertCommonModelSettings( + T model, + String url, + String modelName, + RateLimitSettings rateLimitSettings, + String apiKey + ) { + assertThat(model.uri().toString(), is(url)); + assertThat(model.getServiceSettings().modelId(), is(modelName)); + assertThat(model.rateLimitServiceSettings().rateLimitSettings(), is(rateLimitSettings)); + + assertThat(model.apiKey().toString(), is(apiKey)); + } + + private Map getRequestConfigMap( + Map serviceSettings, + Map taskSettings, + Map secretSettings + ) { + var builtServiceSettings = new HashMap<>(); + builtServiceSettings.putAll(serviceSettings); + builtServiceSettings.putAll(secretSettings); + + return new HashMap<>( + Map.of(ModelConfigurations.SERVICE_SETTINGS, builtServiceSettings, ModelConfigurations.TASK_SETTINGS, taskSettings) + ); + } + + private Map getRequestConfigMap(Map serviceSettings, Map secretSettings) { + var builtServiceSettings = new HashMap<>(); + builtServiceSettings.putAll(serviceSettings); + builtServiceSettings.putAll(secretSettings); + + return new HashMap<>(Map.of(ModelConfigurations.SERVICE_SETTINGS, builtServiceSettings)); + } + private MixedbreadService createMixedbreadService() { return new MixedbreadService(mock(HttpRequestSender.Factory.class), createWithEmptySettings(threadPool), mockClusterServiceEmpty()); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java index e059050cac026..98cec0c94ab5b 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java @@ -44,6 +44,7 @@ import static org.mockito.Mockito.mock; public class MixedbreadActionCreatorTests extends ESTestCase { + private static final String EXPECTED_EXCEPTION = "Failed to send Mixedbread rerank request. Cause: failed"; private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); private static final QueryAndDocsInputs QUERY_AND_DOCS_INPUTS = new QueryAndDocsInputs( "popular name", @@ -103,10 +104,7 @@ public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled var action = createAction("model", "secret", getUrl(webServer), null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); - MatcherAssert.assertThat( - thrownException.getMessage(), - is("Failed to send Mixedbread rerank request from inference entity id [model]. Cause: failed") - ); + MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); } public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled_WhenUrlIsNull() { @@ -122,10 +120,7 @@ public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled var action = createAction("model", "secret", null, null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); - MatcherAssert.assertThat( - thrownException.getMessage(), - is("Failed to send Mixedbread rerank request from inference entity id [model]. Cause: failed") - ); + MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); } public void testExecute_ThrowsException() { @@ -135,10 +130,7 @@ public void testExecute_ThrowsException() { var action = createAction("model", "secret", getUrl(webServer), null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); - MatcherAssert.assertThat( - thrownException.getMessage(), - is("Failed to send Mixedbread rerank request from inference entity id [model]. Cause: failed") - ); + MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); } public void testExecute_ThrowsExceptionWithNullUrl() { @@ -148,10 +140,7 @@ public void testExecute_ThrowsExceptionWithNullUrl() { var action = createAction("model", "secret", null, null, null, sender); var thrownException = executeActionWithException(action); - MatcherAssert.assertThat( - thrownException.getMessage(), - is("Failed to send Mixedbread rerank request from inference entity id [model]. Cause: failed") - ); + MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); } private static ElasticsearchException executeActionWithException(ExecutableAction action) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java index e7c8b0b04574f..55e1d77af4a2c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java @@ -42,7 +42,7 @@ public void testXContent_SingleRequest_WritesAllFieldsIfDefined() throws IOExcep "abc" ], "top_k": 12, - "return_documents": true + "return_input": true } """)); } @@ -87,7 +87,7 @@ public void testXContent_MultipleRequests_WritesAllFieldsIfDefined() throws IOEx "def" ], "top_k": 12, - "return_documents": false + "return_input": false } """)); } @@ -132,7 +132,7 @@ public void testXContent_SingleRequest_UsesTaskSettingsTopNIfRootIsNotDefined() "abc" ], "top_k": 8, - "return_documents": false + "return_input": false } """)); } @@ -155,7 +155,7 @@ public void testXContent_SingleRequest_UsesTaskSettingsReturnDocumentsIfRootIsNo "abc" ], "top_k": 8, - "return_documents": true + "return_input": true } """)); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java index cf79e6bc64e85..20d6356ffd335 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java @@ -30,26 +30,25 @@ public class MixedbreadRerankRequestTests extends ESTestCase { public static final String INPUT = "input"; public static final String MODEL = "model"; public static final String QUERY = "query"; - public static final int TOP_N = 1; + public static final int TOP_K = 1; public void testCreateRequest_WithMinimalFieldsSet() throws IOException { var request = createRequest(QUERY, INPUT, MODEL, null, null); var requestMap = getEntityAsMap(request); assertThat(requestMap, aMapWithSize(3)); - assertThat(requestMap.get("documents"), is(List.of(INPUT))); + assertThat(requestMap.get("input"), is(List.of(INPUT))); assertThat(requestMap.get("query"), is(QUERY)); assertThat(requestMap.get("model"), is(MODEL)); } public void testCreateRequest_WithAllFieldsSet() throws IOException { - var request = createRequest(QUERY, INPUT, MODEL, TOP_N, Boolean.FALSE); + var request = createRequest(QUERY, INPUT, MODEL, TOP_K, Boolean.FALSE); Map requestMap = getEntityAsMap(request); - assertThat(requestMap, aMapWithSize(5)); - assertThat(requestMap.get("documents"), is(List.of(INPUT))); + assertThat(requestMap.get("input"), is(List.of(INPUT))); assertThat(requestMap.get("query"), is(QUERY)); - assertThat(requestMap.get("top_n"), is(TOP_N)); - assertThat(requestMap.get("return_documents"), is(Boolean.FALSE)); + assertThat(requestMap.get("top_k"), is(TOP_K)); + assertThat(requestMap.get("return_input"), is(Boolean.FALSE)); assertThat(requestMap.get("model"), is(MODEL)); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java index aaca3677346cd..3d256b4cffade 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java @@ -14,14 +14,14 @@ import java.util.Map; -import static org.elasticsearch.xpack.inference.services.jinaai.rerank.JinaAIRerankTaskSettingsTests.getTaskSettingsMap; +import static org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettingsTests.getTaskSettingsMap; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.sameInstance; public class MixedbreadRerankModelTests extends ESTestCase { - public static final String DEFAULT_URL = "https://api.mixedbread.com/v1/reranking"; - public static final String CUSTOM_URL = "https://custom.url.com/v1/reranking"; + public static final String DEFAULT_URL = "https://api.mixedbread.com/v1/rerank"; + public static final String CUSTOM_URL = "https://custom.url.com/v1/rerank"; public static final String MODEL_ID = "model_id"; public static final String API_KEY = "secret"; @@ -81,7 +81,7 @@ public static MixedbreadRerankModel createModel( ) { return new MixedbreadRerankModel( model, - new MixedbreadRerankServiceSettings(model, null, null, null), + new MixedbreadRerankServiceSettings(model, null, null), new MixedbreadRerankTaskSettings(topN, returnDocuments), new DefaultSecretSettings(new SecureString(apiKey.toCharArray())), uri diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java index 6538bb2025d21..389bf52771e98 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java @@ -19,48 +19,45 @@ import org.elasticsearch.xpack.inference.services.settings.RateLimitSettingsTests; import java.io.IOException; -import java.net.URI; import java.util.HashMap; import java.util.Map; import static org.elasticsearch.xpack.inference.MatchersUtils.equalToIgnoringWhitespaceInJsonString; +import static org.elasticsearch.xpack.inference.services.settings.RateLimitSettings.REQUESTS_PER_MINUTE_FIELD; public class MixedbreadRerankServiceSettingsTests extends AbstractWireSerializingTestCase { private static final String MODEL = "model"; private static final RateLimitSettings RATE_LIMIT = new RateLimitSettings(2); private static final Integer WINDOWS_SIZE = 512; - private static final URI URI = java.net.URI.create("uri"); public static MixedbreadRerankServiceSettings createRandom() { return createRandom(randomFrom(new RateLimitSettings[] { null, RateLimitSettingsTests.createRandom() })); } public static MixedbreadRerankServiceSettings createRandom(@Nullable RateLimitSettings rateLimitSettings) { - return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings, null, null); + return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings, null); } public void testToXContent_WritesAllValues() throws IOException { - var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, RATE_LIMIT, URI, WINDOWS_SIZE); + var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, RATE_LIMIT, WINDOWS_SIZE); assertThat(getXContentResult(serviceSettings), equalToIgnoringWhitespaceInJsonString(""" { "model_id":"model", "rate_limit": { "requests_per_minute": 2 }, - "url": "uri", "windows_size": 512 } """)); } - public void testToXContent_DoesNotWriteOptionalValues_DefaultRateLimit_And_DefaultWindowsSize() throws IOException { - var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, null, null, null); + var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, null, null); assertThat(getXContentResult(serviceSettings), equalToIgnoringWhitespaceInJsonString(""" { "model_id":"model", "rate_limit": { - "requests_per_minute": 240 + "requests_per_minute": 100 }, "windows_size": 8000 } @@ -93,18 +90,20 @@ protected MixedbreadRerankServiceSettings mutateInstance(MixedbreadRerankService default -> throw new AssertionError("Illegal randomisation branch"); } - return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings, null, null); + return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings, null); + } + + public static Map getServiceSettingsMap(String model) { + return getServiceSettingsMap(model, null); } - public static Map getServiceSettingsMap(@Nullable String url, @Nullable String model) { + public static Map getServiceSettingsMap(String model, @Nullable Integer requestsPerMinute) { var map = new HashMap(); - if (url != null) { - map.put(ServiceFields.URL, url); - } + map.put(ServiceFields.MODEL_ID, model); - if (model != null) { - map.put(ServiceFields.MODEL_ID, model); + if (requestsPerMinute != null) { + map.put(RateLimitSettings.FIELD_NAME, new HashMap<>(Map.of(REQUESTS_PER_MINUTE_FIELD, requestsPerMinute))); } return map; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java index 3ffeb79ec5f9b..6449c42281c1e 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java @@ -67,7 +67,7 @@ public void testFromMap_WithInvalidTopNDocsOnly_ThrowsValidationException() { assertThat(thrownException.getMessage(), containsString("field [top_n] is not of the expected type")); } - public void UpdatedTaskSettings_WithEmptyMap_ReturnsSameSettings() { + public void testUpdatedTaskSettings_WithEmptyMap_ReturnsSameSettings() { var initialSettings = new MixedbreadRerankTaskSettings(5, true); MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(Map.of()); assertEquals(initialSettings, updatedSettings); @@ -127,11 +127,11 @@ public static Map getTaskSettingsMap(@Nullable Integer topNDocum var map = new HashMap(); if (topNDocumentsOnly != null) { - map.put(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, topNDocumentsOnly.toString()); + map.put(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, topNDocumentsOnly); } if (returnDocuments != null) { - map.put(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, returnDocuments.toString()); + map.put(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, returnDocuments); } return map; From 5bbffe1cedc0d9131a066d49237538eeec37dd47 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 27 Jan 2026 22:52:08 +0000 Subject: [PATCH 08/48] [CI] Update transport version definitions --- .../definitions/referable/ml_inference_mixedbread_added.csv | 1 + server/src/main/resources/transport/upper_bounds/9.4.csv | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv diff --git a/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv new file mode 100644 index 0000000000000..661db7649ee83 --- /dev/null +++ b/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv @@ -0,0 +1 @@ +9267000 diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv index 9addac0103757..c9177fae97242 100644 --- a/server/src/main/resources/transport/upper_bounds/9.4.csv +++ b/server/src/main/resources/transport/upper_bounds/9.4.csv @@ -1 +1 @@ -inference_api_eis_max_batch_size,9266000 +ml_inference_mixedbread_added,9267000 From 83a649767a60ff4ecaae8ae0ab6326dcc7b00ef3 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Wed, 28 Jan 2026 01:40:21 +0100 Subject: [PATCH 09/48] Switch to new approach for transport version --- .../inference/services/mixedbread/MixedbreadService.java | 3 +-- .../mixedbread/rerank/MixedbreadRerankTaskSettings.java | 9 ++++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java index 3a6f41be92b12..8108fb8b6250f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -59,7 +59,6 @@ public class MixedbreadService extends SenderService implements RerankingInferen public static final String NAME = "mixedbread"; public static final String SERVICE_NAME = "Mixedbread"; - // private static final TransportVersion MIXEDBREAD_SERVICE = TransportVersion.fromName("mixedbread_service"); private static final EnumSet SUPPORTED_TASK_TYPES = EnumSet.of(TaskType.RERANK); private static final Map RERANKERS_INPUT_SIZE = Map.of( @@ -260,7 +259,7 @@ protected void validateInputType(InputType inputType, Model model, ValidationExc @Override public TransportVersion getMinimalSupportedVersion() { - return TransportVersion.minimumCompatible(); + return MixedbreadUtils.ML_INFERENCE_MIXEDBREAD_ADDED; } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java index c5e8e1afb8928..852169b06a9b3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java @@ -15,6 +15,7 @@ import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskSettings; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadUtils; import java.io.IOException; import java.util.HashMap; @@ -119,7 +120,13 @@ public String getWriteableName() { @Override public TransportVersion getMinimalSupportedVersion() { - return TransportVersion.minimumCompatible(); + assert false : "should never be called when supportsVersion is used"; + return MixedbreadUtils.ML_INFERENCE_MIXEDBREAD_ADDED; + } + + @Override + public boolean supportsVersion(TransportVersion version) { + return MixedbreadUtils.supportsMixedbread(version); } @Override From fbd6d5d37923a52365d7f0df3d676f6978d74911 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Wed, 28 Jan 2026 03:17:01 +0100 Subject: [PATCH 10/48] Use ConstructingObjectParser --- .../MixedbreadRerankResponseEntity.java | 107 +++++++++++------- 1 file changed, 65 insertions(+), 42 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java index 45256f0b95456..55a09971c2271 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java @@ -7,12 +7,13 @@ package org.elasticsearch.xpack.inference.services.mixedbread.response; -import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.core.Nullable; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParseException; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; @@ -22,14 +23,10 @@ import java.io.IOException; import java.util.List; -import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; -import static org.elasticsearch.common.xcontent.XContentParserUtils.parseList; -import static org.elasticsearch.common.xcontent.XContentParserUtils.throwUnknownToken; -import static org.elasticsearch.xpack.inference.external.response.XContentUtils.moveToFirstToken; -import static org.elasticsearch.xpack.inference.external.response.XContentUtils.positionParserAtTokenAfterField; +import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; public class MixedbreadRerankResponseEntity { - private static final String FAILED_TO_FIND_FIELD_TEMPLATE = "Failed to find required field [%s] in Mixedbread rerank response"; /** * Parses the Mixedbread rerank response. @@ -90,55 +87,81 @@ public class MixedbreadRerankResponseEntity { * @throws IOException if there is an error parsing the response */ public static InferenceServiceResults fromResponse(HttpResult response) throws IOException { - var parserConfig = XContentParserConfiguration.EMPTY.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE); - - try (XContentParser jsonParser = XContentFactory.xContent(XContentType.JSON).createParser(parserConfig, response.body())) { - moveToFirstToken(jsonParser); - moveToFirstToken(jsonParser); - return new RankedDocsResults(doParse(jsonParser)); + try (var p = XContentFactory.xContent(XContentType.JSON).createParser(XContentParserConfiguration.EMPTY, response.body())) { + return Response.PARSER.apply(p, null).toRankedDocsResults(); } } - private static List doParse(XContentParser parser) throws IOException { - XContentParser.Token token = parser.currentToken(); - ensureExpectedToken(XContentParser.Token.START_OBJECT, token, parser); - - positionParserAtTokenAfterField(parser, "data", FAILED_TO_FIND_FIELD_TEMPLATE); + private record Response(List results) { + @SuppressWarnings("unchecked") + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + Response.class.getSimpleName(), + true, + args -> new Response((List) args[0]) + ); - token = parser.currentToken(); - if (token == XContentParser.Token.START_ARRAY) { - return parseList(parser, (listParser, index) -> { - var parsedRankedDoc = RankedDocEntry.parse(parser); - return new RankedDocsResults.RankedDoc(parsedRankedDoc.index, parsedRankedDoc.score, parsedRankedDoc.text); - }); - } else { - throwUnknownToken(token, parser); + static { + PARSER.declareObjectArray(constructorArg(), ResultItem.PARSER::apply, new ParseField("data")); } - // This should never be reached. The above code should either return successfully or hit the throwUnknownToken - // or throw a parsing exception - throw new IllegalStateException("Reached an invalid state while parsing the Mixedbread response"); + public RankedDocsResults toRankedDocsResults() { + List rankedDocs = results.stream() + .map( + item -> new RankedDocsResults.RankedDoc( + item.index(), + item.relevanceScore(), + item.document() != null ? item.document().text() : null + ) + ) + .toList(); + return new RankedDocsResults(rankedDocs); + } } - private record RankedDocEntry(Integer index, Float score, @Nullable String text) { - - private static final ParseField TEXT = new ParseField("input"); - private static final ParseField SCORE = new ParseField("score"); - private static final ParseField INDEX = new ParseField("index"); - private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( - "mixedbread_rerank_response", + private record ResultItem(int index, float relevanceScore, @Nullable Document document) { + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + ResultItem.class.getSimpleName(), true, - args -> new RankedDocEntry((int) args[0], (float) args[1], (String) args[2]) + args -> new ResultItem((Integer) args[0], (Float) args[1], (Document) args[2]) ); static { - PARSER.declareInt(ConstructingObjectParser.constructorArg(), INDEX); - PARSER.declareFloat(ConstructingObjectParser.constructorArg(), SCORE); - PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TEXT); + PARSER.declareInt(constructorArg(), new ParseField("index")); + PARSER.declareFloat(constructorArg(), new ParseField("score")); + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> parseDocument(p), + new ParseField("input"), + ObjectParser.ValueType.VALUE + ); } + } + + private record Document(String text) {} - public static RankedDocEntry parse(XContentParser parser) { - return PARSER.apply(parser, null); + private static Document parseDocument(XContentParser parser) throws IOException { + var token = parser.currentToken(); + if (token == XContentParser.Token.START_OBJECT) { + return new Document(DocumentObject.PARSER.apply(parser, null).text()); + } else if (token == XContentParser.Token.VALUE_STRING) { + return new Document(parser.text()); + } else if (token == XContentParser.Token.VALUE_NULL) { + return new Document(null); + } + + throw new XContentParseException(parser.getTokenLocation(), + "Expected an object, string or null for document field, but got: " + token); + } + + private record DocumentObject(String text) { + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + DocumentObject.class.getSimpleName(), + true, + args -> new DocumentObject((String) args[0]) + ); + + static { + PARSER.declareString(constructorArg(), new ParseField("text")); } } } From f4c5c0d45aab75170eee0d0a87847e022d20c149 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Wed, 28 Jan 2026 05:36:55 +0100 Subject: [PATCH 11/48] Address comments --- docs/changelog/140477.yaml | 5 +++++ .../services/mixedbread/MixedbreadModel.java | 12 ++++-------- .../MixedbreadRateLimitServiceSettings.java | 2 -- .../mixedbread/rerank/MixedbreadRerankModel.java | 2 +- .../rerank/MixedbreadRerankServiceSettings.java | 6 ------ .../services/mixedbread/MixedbreadServiceTests.java | 2 +- 6 files changed, 11 insertions(+), 18 deletions(-) create mode 100644 docs/changelog/140477.yaml diff --git a/docs/changelog/140477.yaml b/docs/changelog/140477.yaml new file mode 100644 index 0000000000000..646b31384722b --- /dev/null +++ b/docs/changelog/140477.yaml @@ -0,0 +1,5 @@ +pr: 140477 +summary: "[ML] Add Mixedbread Rerank support to the Inference Plugin" +area: Machine Learning +type: enhancement +issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java index b7fc69095350b..e290534a64e87 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java @@ -31,14 +31,14 @@ */ public abstract class MixedbreadModel extends RateLimitGroupingModel { private final SecureString apiKey; - private final MixedbreadRateLimitServiceSettings rateLimitServiceSettings; + private final RateLimitSettings rateLimitServiceSettings; protected URI uri; public MixedbreadModel( ModelConfigurations configurations, ModelSecrets secrets, @Nullable ApiKeySecrets apiKeySecrets, - MixedbreadRateLimitServiceSettings rateLimitServiceSettings, + RateLimitSettings rateLimitServiceSettings, URI uri ) { super(configurations, secrets); @@ -68,7 +68,7 @@ public SecureString apiKey() { return apiKey; } - public MixedbreadRateLimitServiceSettings rateLimitServiceSettings() { + public RateLimitSettings rateLimitServiceSettings() { return rateLimitServiceSettings; } @@ -79,7 +79,7 @@ public URI uri() { } public RateLimitSettings rateLimitSettings() { - return rateLimitServiceSettings.rateLimitSettings(); + return rateLimitServiceSettings; } public int rateLimitGroupingHash() { @@ -94,8 +94,4 @@ public void setURI(String newUri) { // swallow any error } } - - public URI baseUri() { - return uri; - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java index dfc23253ef23c..54a8c9e9de624 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java @@ -11,6 +11,4 @@ public interface MixedbreadRateLimitServiceSettings { RateLimitSettings rateLimitSettings(); - - Integer windowSize(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java index 226ff56957979..54559964e8d68 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java @@ -70,7 +70,7 @@ public MixedbreadRerankModel( new ModelConfigurations(modelId, TaskType.RERANK, MixedbreadService.NAME, serviceSettings, taskSettings), new ModelSecrets(secretSettings), secretSettings, - serviceSettings, + serviceSettings.rateLimitSettings(), Objects.requireNonNullElse( ServiceUtils.createOptionalUri(uri), buildUri(MixedbreadService.SERVICE_NAME, DEFAULT_URI_BUILDER::build) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java index b6760d5d08f9b..b053bc3c3bfdc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -88,16 +88,10 @@ public String modelId() { return model; } - @Override public RateLimitSettings rateLimitSettings() { return rateLimitSettings; } - @Override - public Integer windowSize() { - return windowsSize; - } - @Override public String getWriteableName() { return NAME; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java index 63bee718039e5..04f07488454a9 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java @@ -580,7 +580,7 @@ private static void assertCommonModelSettings( ) { assertThat(model.uri().toString(), is(url)); assertThat(model.getServiceSettings().modelId(), is(modelName)); - assertThat(model.rateLimitServiceSettings().rateLimitSettings(), is(rateLimitSettings)); + assertThat(model.rateLimitServiceSettings(), is(rateLimitSettings)); assertThat(model.apiKey().toString(), is(apiKey)); } From faa5ce847b26e5668d6bcb5a8521eb76922df52d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 28 Jan 2026 07:00:06 +0000 Subject: [PATCH 12/48] [CI] Auto commit changes from spotless --- .../mixedbread/response/MixedbreadRerankResponseEntity.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java index 55a09971c2271..ccabc4e1fcd0e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java @@ -149,8 +149,10 @@ private static Document parseDocument(XContentParser parser) throws IOException return new Document(null); } - throw new XContentParseException(parser.getTokenLocation(), - "Expected an object, string or null for document field, but got: " + token); + throw new XContentParseException( + parser.getTokenLocation(), + "Expected an object, string or null for document field, but got: " + token + ); } private record DocumentObject(String text) { From 36f799446c9a0bef9dc579aedab99482ee4ebcf5 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Wed, 28 Jan 2026 11:25:01 +0100 Subject: [PATCH 13/48] Fix the test --- .../elasticsearch/xpack/inference/InferenceGetServicesIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java index 7c01cf3fc7fb4..635ceb715fbee 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java @@ -145,6 +145,7 @@ public void testGetServicesWithRerankTaskType() throws IOException { "elasticsearch", "googlevertexai", "jinaai", + "mixedbread", "nvidia", "openshift_ai", "test_reranking_service", From 87f09e5b43c47134c22866c0baada768090b53e3 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Wed, 28 Jan 2026 15:13:45 +0100 Subject: [PATCH 14/48] Checkstyle fix --- .../mixedbread/rerank/MixedbreadRerankTaskSettings.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java index 852169b06a9b3..aacb6cb601668 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java @@ -59,7 +59,8 @@ public static MixedbreadRerankTaskSettings fromMap(Map map) { } /** - * Creates a new {@link MixedbreadRerankTaskSettings} by preferring non-null fields from the request settings over the original settings. + * Creates a new {@link MixedbreadRerankTaskSettings} + * by preferring non-null fields from the request settings over the original settings. * * @param originalSettings the settings stored as part of the inference entity configuration * @param requestTaskSettings the settings passed in within the task_settings field of the request From fa208ee5bf2d82d6888c11b565d61457947a4302 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Wed, 28 Jan 2026 16:33:16 +0100 Subject: [PATCH 15/48] Fix the test --- .../elasticsearch/xpack/inference/InferenceGetServicesIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java index 635ceb715fbee..3747b281013d1 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java @@ -81,7 +81,8 @@ public void testGetServicesWithoutTaskType() throws IOException { "text_embedding_test_service", "voyageai", "watsonxai", - "amazon_sagemaker" + "amazon_sagemaker", + "mixedbread" ).toArray() ) ); From 5ed8e6a166f13db5b243d2f0863e91908310cca1 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Thu, 29 Jan 2026 09:27:40 +0100 Subject: [PATCH 16/48] Clean up --- .../MixedbreadRateLimitServiceSettings.java | 14 -------------- .../rerank/MixedbreadRerankServiceSettings.java | 3 +-- 2 files changed, 1 insertion(+), 16 deletions(-) delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java deleted file mode 100644 index 54a8c9e9de624..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadRateLimitServiceSettings.java +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.mixedbread; - -import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; - -public interface MixedbreadRateLimitServiceSettings { - RateLimitSettings rateLimitSettings(); -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java index b053bc3c3bfdc..5d2bf504a869a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -16,7 +16,6 @@ import org.elasticsearch.inference.ServiceSettings; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; -import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadRateLimitServiceSettings; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadUtils; import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject; @@ -30,7 +29,7 @@ import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalInteger; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; -public class MixedbreadRerankServiceSettings extends FilteredXContentObject implements ServiceSettings, MixedbreadRateLimitServiceSettings { +public class MixedbreadRerankServiceSettings extends FilteredXContentObject implements ServiceSettings { public static final String NAME = "mixedbread_rerank_service_settings"; public static final String WINDOWS_SIZE = "windows_size"; From 6c3bc8fef71b106cd859d26875cb3825ceb94b49 Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Thu, 29 Jan 2026 14:58:32 +0100 Subject: [PATCH 17/48] Clean up --- .../mixedbread/MixedbreadConstants.java | 4 ++ .../services/mixedbread/MixedbreadModel.java | 16 +------- .../rerank/MixedbreadRerankModel.java | 40 ++++++++++--------- .../mixedbread/MixedbreadServiceTests.java | 11 +++-- .../action/MixedbreadActionCreatorTests.java | 33 ++++----------- .../request/MixedbreadRerankRequestTests.java | 2 +- .../rerank/MixedbreadRerankModelTests.java | 24 +++++------ 7 files changed, 54 insertions(+), 76 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java index 6fa1fed2efd22..b4cc569324896 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java @@ -7,9 +7,13 @@ package org.elasticsearch.xpack.inference.services.mixedbread; +import org.apache.http.client.utils.URIBuilder; + public class MixedbreadConstants { + public static final String HOST = "api.mixedbread.com"; public static final String VERSION_1 = "v1"; public static final String RERANK_PATH = "rerank"; + public static URIBuilder DEFAULT_URI_BUILDER = new URIBuilder().setScheme("https").setHost(MixedbreadConstants.HOST); // common service settings fields public static final String MODEL_FIELD = "model"; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java index e290534a64e87..76c181a217885 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java @@ -21,7 +21,6 @@ import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; import java.net.URI; -import java.net.URISyntaxException; import java.util.Map; import java.util.Objects; @@ -38,14 +37,12 @@ public MixedbreadModel( ModelConfigurations configurations, ModelSecrets secrets, @Nullable ApiKeySecrets apiKeySecrets, - RateLimitSettings rateLimitServiceSettings, - URI uri + RateLimitSettings rateLimitServiceSettings ) { super(configurations, secrets); this.rateLimitServiceSettings = Objects.requireNonNull(rateLimitServiceSettings); apiKey = ServiceUtils.apiKey(apiKeySecrets); - this.uri = uri; } protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) { @@ -53,7 +50,6 @@ protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) { rateLimitServiceSettings = model.rateLimitServiceSettings(); apiKey = model.apiKey(); - uri = model.uri(); } protected MixedbreadModel(MixedbreadModel model, ServiceSettings serviceSettings) { @@ -61,7 +57,6 @@ protected MixedbreadModel(MixedbreadModel model, ServiceSettings serviceSettings rateLimitServiceSettings = model.rateLimitServiceSettings(); apiKey = model.apiKey(); - uri = model.uri(); } public SecureString apiKey() { @@ -85,13 +80,4 @@ public RateLimitSettings rateLimitSettings() { public int rateLimitGroupingHash() { return apiKey().hashCode(); } - - // Needed for testing only - public void setURI(String newUri) { - try { - this.uri = new URI(newUri); - } catch (URISyntaxException e) { - // swallow any error - } - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java index 54559964e8d68..b80cefa9b9f9e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java @@ -7,32 +7,29 @@ package org.elasticsearch.xpack.inference.services.mixedbread.rerank; -import org.apache.http.client.utils.URIBuilder; import org.elasticsearch.core.Nullable; import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.ModelSecrets; import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; -import org.elasticsearch.xpack.inference.services.ServiceUtils; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadModel; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor; import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; +import java.net.URI; +import java.net.URISyntaxException; import java.util.Map; -import java.util.Objects; import static org.elasticsearch.xpack.inference.external.request.RequestUtils.buildUri; public class MixedbreadRerankModel extends MixedbreadModel { - public static final String HOST = "api.mixedbread.com"; - public static final String VERSION_1 = "v1"; - - private static final URIBuilder DEFAULT_URI_BUILDER = new URIBuilder().setScheme("https") - .setHost(HOST) - .setPathSegments(VERSION_1, MixedbreadConstants.RERANK_PATH); + private URI uri = buildUri( + MixedbreadService.SERVICE_NAME, + MixedbreadConstants.DEFAULT_URI_BUILDER.setPathSegments(MixedbreadConstants.VERSION_1, MixedbreadConstants.RERANK_PATH)::build + ); public static MixedbreadRerankModel of(MixedbreadRerankModel model, Map taskSettings) { var requestTaskSettings = MixedbreadRerankTaskSettings.fromMap(taskSettings); @@ -53,8 +50,7 @@ public MixedbreadRerankModel( inferenceId, MixedbreadRerankServiceSettings.fromMap(serviceSettings, context), MixedbreadRerankTaskSettings.fromMap(taskSettings), - DefaultSecretSettings.fromMap(secrets), - null + DefaultSecretSettings.fromMap(secrets) ); } @@ -63,18 +59,13 @@ public MixedbreadRerankModel( String modelId, MixedbreadRerankServiceSettings serviceSettings, MixedbreadRerankTaskSettings taskSettings, - @Nullable DefaultSecretSettings secretSettings, - @Nullable String uri + @Nullable DefaultSecretSettings secretSettings ) { super( new ModelConfigurations(modelId, TaskType.RERANK, MixedbreadService.NAME, serviceSettings, taskSettings), new ModelSecrets(secretSettings), secretSettings, - serviceSettings.rateLimitSettings(), - Objects.requireNonNullElse( - ServiceUtils.createOptionalUri(uri), - buildUri(MixedbreadService.SERVICE_NAME, DEFAULT_URI_BUILDER::build) - ) + serviceSettings.rateLimitSettings() ); } @@ -101,6 +92,19 @@ public DefaultSecretSettings getSecretSettings() { return (DefaultSecretSettings) super.getSecretSettings(); } + public URI uri() { + return uri; + } + + // Needed for testing only + public void setURI(String newUri) { + try { + uri = new URI(newUri); + } catch (URISyntaxException e) { + // swallow any error + } + } + /** * Accepts a visitor to create an executable action. The returned action will not return documents in the response. * @param visitor Interface for creating {@link ExecutableAction} instances for Mixedbread models. diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java index 04f07488454a9..8257e614093f7 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java @@ -235,7 +235,7 @@ public void testInfer_Rerank_UnauthorisedResponse() throws IOException { """; webServer.enqueue(new MockResponse().setResponseCode(401).setBody(responseJson)); - var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", "uri", 1024, false); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", 1024, false); model.setURI(getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); @@ -293,7 +293,8 @@ public void testInfer_Rerank_Get_Response_NoReturnDocuments_NoTopN() throws IOEx try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); - var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", getUrl(webServer), null, false); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", null, false); + model.setURI(getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); service.infer( model, @@ -369,7 +370,8 @@ public void testInfer_Rerank_Get_Response_ReturnDocumentsNull_NoTopN() throws IO try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); - var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", getUrl(webServer), null, null); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", null, null); + model.setURI(getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); service.infer( model, @@ -447,7 +449,8 @@ public void testInfer_Rerank_Get_Response_ReturnDocuments_TopN() throws IOExcept try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); - var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", getUrl(webServer), 3, true); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", 3, true); + model.setURI(getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); service.infer( model, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java index 98cec0c94ab5b..e5157308dc4f8 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java @@ -34,9 +34,7 @@ import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityExecutors; import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; -import static org.elasticsearch.xpack.inference.external.http.Utils.getUrl; import static org.elasticsearch.xpack.inference.services.ServiceComponentsTests.createWithEmptySettings; -import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.is; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; @@ -71,21 +69,11 @@ public void shutdown() throws IOException { webServer.close(); } - public void testExecute_ThrowsURISyntaxException_ForInvalidUrl() throws IOException { - try (var sender = mock(Sender.class)) { - var thrownException = expectThrows( - IllegalArgumentException.class, - () -> createAction("model", "secret", "^^", null, null, sender) - ); - MatcherAssert.assertThat(thrownException.getMessage(), containsString("unable to parse url [^^]")); - } - } - public void testExecute_ThrowsElasticsearchException() { var sender = mock(Sender.class); doThrow(new ElasticsearchException("failed")).when(sender).send(any(), any(), any(), any()); - var action = createAction("model", "secret", getUrl(webServer), null, null, sender); + var action = createAction("model", "secret", null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); MatcherAssert.assertThat(thrownException.getMessage(), is("failed")); @@ -101,7 +89,7 @@ public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled return Void.TYPE; }).when(sender).send(any(), any(), any(), any()); - var action = createAction("model", "secret", getUrl(webServer), null, null, sender); + var action = createAction("model", "secret", null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); @@ -117,7 +105,7 @@ public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled return Void.TYPE; }).when(sender).send(any(), any(), any(), any()); - var action = createAction("model", "secret", null, null, null, sender); + var action = createAction("model", "secret", null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); @@ -127,7 +115,7 @@ public void testExecute_ThrowsException() { var sender = mock(Sender.class); doThrow(new IllegalArgumentException("failed")).when(sender).send(any(), any(), any(), any()); - var action = createAction("model", "secret", getUrl(webServer), null, null, sender); + var action = createAction("model", "secret", null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); @@ -137,7 +125,7 @@ public void testExecute_ThrowsExceptionWithNullUrl() { var sender = mock(Sender.class); doThrow(new IllegalArgumentException("failed")).when(sender).send(any(), any(), any(), any()); - var action = createAction("model", "secret", null, null, null, sender); + var action = createAction("model", "secret", null, null, sender); var thrownException = executeActionWithException(action); MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); @@ -149,16 +137,9 @@ private static ElasticsearchException executeActionWithException(ExecutableActio return expectThrows(ElasticsearchException.class, () -> listener.actionGet(TIMEOUT)); } - private ExecutableAction createAction( - String modelName, - String apiKey, - String url, - Integer topN, - Boolean returnDocuments, - Sender sender - ) { + private ExecutableAction createAction(String modelName, String apiKey, Integer topN, Boolean returnDocuments, Sender sender) { var actionCreator = new MixedbreadActionCreator(sender, createWithEmptySettings(threadPool)); - var model = MixedbreadRerankModelTests.createModel(modelName, apiKey, url, topN, returnDocuments); + var model = MixedbreadRerankModelTests.createModel(modelName, apiKey, topN, returnDocuments); return actionCreator.create(model, null); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java index 20d6356ffd335..9179617b097d0 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java @@ -66,7 +66,7 @@ private static MixedbreadRerankRequest createRequest( @Nullable Integer topN, @Nullable Boolean returnDocuments ) { - var rerankModel = MixedbreadRerankModelTests.createModel(modelId, API_KEY, null, null, null); + var rerankModel = MixedbreadRerankModelTests.createModel(modelId, API_KEY, null, null); return new MixedbreadRerankRequest(query, List.of(input), returnDocuments, topN, rerankModel); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java index 3d256b4cffade..d873f595e3556 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java @@ -26,23 +26,25 @@ public class MixedbreadRerankModelTests extends ESTestCase { public static final String API_KEY = "secret"; public void testConstructor_usesDefaultUrlWhenNull() { - var model = createModel(MODEL_ID, API_KEY, null, null, null); + var model = createModel(MODEL_ID, API_KEY, null, null); + model.setURI(DEFAULT_URL); assertThat(model.uri().toString(), is(DEFAULT_URL)); } public void testConstructor_usesUrlWhenSpecified() { - var model = createModel(MODEL_ID, API_KEY, CUSTOM_URL, null, null); + var model = createModel(MODEL_ID, API_KEY, null, null); + model.setURI(CUSTOM_URL); assertThat(model.uri().toString(), is(CUSTOM_URL)); } public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreEmpty() { - var model = createModel(MODEL_ID, API_KEY, null, 10, true); + var model = createModel(MODEL_ID, API_KEY, 10, true); var overriddenModel = MixedbreadRerankModel.of(model, Map.of()); assertThat(overriddenModel, sameInstance(model)); } public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreNull() { - var model = createModel(MODEL_ID, API_KEY, null, 10, true); + var model = createModel(MODEL_ID, API_KEY, 10, true); var overriddenModel = MixedbreadRerankModel.of(model, null); assertThat(overriddenModel, sameInstance(model)); } @@ -50,32 +52,31 @@ public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreNull() { public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreEqual() { var topN = randomNonNegativeInt(); var returnDocuments = randomBoolean(); - var model = createModel(MODEL_ID, API_KEY, null, topN, returnDocuments); + var model = createModel(MODEL_ID, API_KEY, topN, returnDocuments); var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(topN, returnDocuments)); assertThat(overriddenModel, sameInstance(model)); } public void testOf_SetsTopN_FromRequestTaskSettings_OverridingStoredTaskSettings() { - var model = createModel(MODEL_ID, API_KEY, null, 15, null); + var model = createModel(MODEL_ID, API_KEY, 15, null); var topNFromRequest = 10; var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(topNFromRequest, null)); - var expectedModel = createModel(MODEL_ID, API_KEY, null, topNFromRequest, null); + var expectedModel = createModel(MODEL_ID, API_KEY, topNFromRequest, null); assertThat(overriddenModel, is(expectedModel)); } public void testOf_SetsReturnDocuments_FromRequestTaskSettings() { var topN = 15; - var model = createModel(MODEL_ID, API_KEY, null, topN, true); + var model = createModel(MODEL_ID, API_KEY, topN, true); var returnDocumentsFromRequest = false; var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(null, returnDocumentsFromRequest)); - var expectedModel = createModel(MODEL_ID, API_KEY, null, topN, returnDocumentsFromRequest); + var expectedModel = createModel(MODEL_ID, API_KEY, topN, returnDocumentsFromRequest); assertThat(overriddenModel, is(expectedModel)); } public static MixedbreadRerankModel createModel( String model, String apiKey, - String uri, @Nullable Integer topN, @Nullable Boolean returnDocuments ) { @@ -83,8 +84,7 @@ public static MixedbreadRerankModel createModel( model, new MixedbreadRerankServiceSettings(model, null, null), new MixedbreadRerankTaskSettings(topN, returnDocuments), - new DefaultSecretSettings(new SecureString(apiKey.toCharArray())), - uri + new DefaultSecretSettings(new SecureString(apiKey.toCharArray())) ); } } From 81f9aca452da217d3d13326d4bf9879b495d7097 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 29 Jan 2026 14:15:11 +0000 Subject: [PATCH 18/48] [CI] Update transport version definitions --- .../definitions/referable/ml_inference_mixedbread_added.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.4.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv index 661db7649ee83..4534f7ecd57a0 100644 --- a/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv +++ b/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv @@ -1 +1 @@ -9267000 +9269000 diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv index 41a6dca5606a9..8ac78bc34b783 100644 --- a/server/src/main/resources/transport/upper_bounds/9.4.csv +++ b/server/src/main/resources/transport/upper_bounds/9.4.csv @@ -1 +1 @@ -nodes_write_load_hotspotting_in_cluster_info,9268000 +ml_inference_mixedbread_added,9269000 From e447752adea2c6925db6a7a8f783d5668965c5af Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Thu, 29 Jan 2026 17:59:50 +0100 Subject: [PATCH 19/48] ci: retrigger From 68108cf92423a417808d5b9842d620ac2bf51f4b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 30 Jan 2026 07:42:40 +0000 Subject: [PATCH 20/48] [CI] Update transport version definitions --- .../definitions/referable/ml_inference_mixedbread_added.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.4.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv index 4534f7ecd57a0..8988ac60abe1d 100644 --- a/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv +++ b/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv @@ -1 +1 @@ -9269000 +9270000 diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv index 6809ffa85ee72..57e96144fae1a 100644 --- a/server/src/main/resources/transport/upper_bounds/9.4.csv +++ b/server/src/main/resources/transport/upper_bounds/9.4.csv @@ -1 +1 @@ -esql_view_queries,9269000 +ml_inference_mixedbread_added,9270000 From 2ab34e0f487301e0063510932b9b6979bdd788bf Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Sun, 1 Feb 2026 21:43:50 +0100 Subject: [PATCH 21/48] Address comments and refactor --- docs/changelog/140477.yaml | 2 +- .../http/retry/BaseResponseHandler.java | 4 + .../GoogleAiStudioResponseHandler.java | 7 -- .../GoogleVertexAiResponseHandler.java | 6 -- .../ibmwatsonx/IbmWatsonxResponseHandler.java | 6 -- .../mixedbread/MixedbreadConstants.java | 33 ------- .../services/mixedbread/MixedbreadModel.java | 13 +-- .../mixedbread/MixedbreadService.java | 2 +- .../services/mixedbread/MixedbreadUtils.java | 23 ++++- .../action/MixedbreadActionCreator.java | 6 +- .../{ => rerank}/MixedbreadRerankRequest.java | 15 ++-- .../MixedbreadRerankRequestEntity.java | 23 ++--- .../rerank/MixedbreadRerankModel.java | 45 ++++------ .../MixedbreadRerankResponseHandler.java | 22 ----- .../MixedbreadRerankServiceSettings.java | 10 +-- .../rerank/MixedbreadRerankTaskSettings.java | 24 ++---- .../rerank/MixedbreadResponseHandler.java | 6 -- .../MixedbreadRerankResponseEntity.java | 2 + .../openai/OpenAiResponseHandler.java | 5 -- .../mixedbread/MixedbreadServiceTests.java | 86 ++++++++----------- .../action/MixedbreadActionCreatorTests.java | 51 ++--------- .../MixedbreadRerankRequestEntityTests.java | 1 + .../request/MixedbreadRerankRequestTests.java | 17 ++-- .../rerank/MixedbreadRerankModelTests.java | 30 +++---- .../MixedbreadRerankTaskSettingsTests.java | 56 ++++++------ .../MixedbreadRerankResponseEntityTests.java | 4 +- 26 files changed, 183 insertions(+), 316 deletions(-) delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java rename x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/{ => rerank}/MixedbreadRerankRequest.java (88%) rename x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/{ => rerank}/MixedbreadRerankRequestEntity.java (62%) delete mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java diff --git a/docs/changelog/140477.yaml b/docs/changelog/140477.yaml index 646b31384722b..66de374507108 100644 --- a/docs/changelog/140477.yaml +++ b/docs/changelog/140477.yaml @@ -1,5 +1,5 @@ pr: 140477 -summary: "[ML] Add Mixedbread Rerank support to the Inference Plugin" +summary: "[Inference API] Add Mixedbread Rerank support to the Inference Plugin" area: Machine Learning type: enhancement issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java index 7a28084dace09..ed1e9e80eb7fc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/retry/BaseResponseHandler.java @@ -118,4 +118,8 @@ public static RestStatus toRestStatus(int statusCode) { return code == null ? RestStatus.BAD_REQUEST : code; } + + protected static String resourceNotFoundError(Request request) { + return format("Resource not found at [%s]", request.getURI()); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioResponseHandler.java index 32a436e9e97cd..c71c21d078427 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioResponseHandler.java @@ -23,8 +23,6 @@ import java.io.IOException; import java.util.concurrent.Flow; -import static org.elasticsearch.core.Strings.format; - public class GoogleAiStudioResponseHandler extends BaseResponseHandler { static final String GOOGLE_AI_STUDIO_UNAVAILABLE = "The Google AI Studio service may be temporarily overloaded or down"; @@ -82,10 +80,6 @@ protected void checkForFailureStatusCode(Request request, HttpResult result) thr } } - private static String resourceNotFoundError(Request request) { - return format("Resource not found at [%s]", request.getURI()); - } - @Override public InferenceServiceResults parseResult(Request request, Flow.Publisher flow) { var serverSentEventProcessor = new ServerSentEventProcessor(new ServerSentEventParser()); @@ -94,5 +88,4 @@ public InferenceServiceResults parseResult(Request request, Flow.Publisher flow) { var serverSentEventProcessor = new ServerSentEventProcessor(new ServerSentEventParser()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxResponseHandler.java index 82e05749967e3..19156d905ab66 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxResponseHandler.java @@ -14,8 +14,6 @@ import org.elasticsearch.xpack.inference.external.request.Request; import org.elasticsearch.xpack.inference.services.ibmwatsonx.response.IbmWatsonxErrorResponseEntity; -import static org.elasticsearch.core.Strings.format; - public class IbmWatsonxResponseHandler extends BaseResponseHandler { public IbmWatsonxResponseHandler(String requestType, ResponseParser parseFunction) { super(requestType, parseFunction, IbmWatsonxErrorResponseEntity::fromResponse); @@ -53,8 +51,4 @@ protected void checkForFailureStatusCode(Request request, HttpResult result) thr throw new RetryException(false, buildError(UNSUCCESSFUL, request, result)); } } - - private static String resourceNotFoundError(Request request) { - return format("Resource not found at [%s]", request.getURI()); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java deleted file mode 100644 index b4cc569324896..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadConstants.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.mixedbread; - -import org.apache.http.client.utils.URIBuilder; - -public class MixedbreadConstants { - public static final String HOST = "api.mixedbread.com"; - public static final String VERSION_1 = "v1"; - public static final String RERANK_PATH = "rerank"; - public static URIBuilder DEFAULT_URI_BUILDER = new URIBuilder().setScheme("https").setHost(MixedbreadConstants.HOST); - - // common service settings fields - public static final String MODEL_FIELD = "model"; - - public static final String INPUT_FIELD = "input"; - - // rerank task settings fields - public static final String QUERY_FIELD = "query"; - - public static final String DOCUMENTS_FIELD = "documents"; - - // rerank task settings fields - public static final String RETURN_DOCUMENTS_FIELD = "return_input"; - public static final String TOP_K_FIELD = "top_k"; - - private MixedbreadConstants() {} -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java index 76c181a217885..75ffb0be44dde 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java @@ -11,7 +11,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.ModelSecrets; -import org.elasticsearch.inference.ServiceSettings; import org.elasticsearch.inference.TaskSettings; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel; @@ -37,12 +36,14 @@ public MixedbreadModel( ModelConfigurations configurations, ModelSecrets secrets, @Nullable ApiKeySecrets apiKeySecrets, - RateLimitSettings rateLimitServiceSettings + RateLimitSettings rateLimitServiceSettings, + URI uri ) { super(configurations, secrets); this.rateLimitServiceSettings = Objects.requireNonNull(rateLimitServiceSettings); apiKey = ServiceUtils.apiKey(apiKeySecrets); + this.uri = uri; } protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) { @@ -50,13 +51,7 @@ protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) { rateLimitServiceSettings = model.rateLimitServiceSettings(); apiKey = model.apiKey(); - } - - protected MixedbreadModel(MixedbreadModel model, ServiceSettings serviceSettings) { - super(model, serviceSettings); - - rateLimitServiceSettings = model.rateLimitServiceSettings(); - apiKey = model.apiKey(); + uri = model.uri(); } public SecureString apiKey() { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java index 8108fb8b6250f..5558f134e4af6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -259,7 +259,7 @@ protected void validateInputType(InputType inputType, Model model, ValidationExc @Override public TransportVersion getMinimalSupportedVersion() { - return MixedbreadUtils.ML_INFERENCE_MIXEDBREAD_ADDED; + return MixedbreadUtils.INFERENCE_MIXEDBREAD_ADDED; } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java index 38d363d75b5e2..a8de119445ac1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java @@ -7,17 +7,36 @@ package org.elasticsearch.xpack.inference.services.mixedbread; +import org.apache.http.client.utils.URIBuilder; import org.elasticsearch.TransportVersion; /** * Utility class for Mixedbread related version checks. */ public final class MixedbreadUtils { + public static final String HOST = "api.mixedbread.com"; + public static final String VERSION_1 = "v1"; + public static final String RERANK_PATH = "reranking"; + public static URIBuilder DEFAULT_URI_BUILDER = new URIBuilder().setScheme("https").setHost(MixedbreadUtils.HOST); + + // common service settings fields + public static final String MODEL_FIELD = "model"; + + public static final String INPUT_FIELD = "input"; + + // rerank task settings fields + public static final String QUERY_FIELD = "query"; + + public static final String DOCUMENTS_FIELD = "documents"; + + // rerank task settings fields + public static final String RETURN_DOCUMENTS_FIELD = "return_input"; + public static final String TOP_K_FIELD = "top_k"; /** * TransportVersion indicating when Mixedbread features were added. */ - public static final TransportVersion ML_INFERENCE_MIXEDBREAD_ADDED = TransportVersion.fromName("ml_inference_mixedbread_added"); + public static final TransportVersion INFERENCE_MIXEDBREAD_ADDED = TransportVersion.fromName("ml_inference_mixedbread_added"); /** * Checks if the given TransportVersion supports Mixedbread features. @@ -26,7 +45,7 @@ public final class MixedbreadUtils { * @return true if Mixedbread features are supported, false otherwise */ public static boolean supportsMixedbread(TransportVersion version) { - return version.supports(ML_INFERENCE_MIXEDBREAD_ADDED); + return version.supports(INFERENCE_MIXEDBREAD_ADDED); } /** diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java index e01e3e33991cd..e54774ab2b576 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreator.java @@ -14,9 +14,9 @@ import org.elasticsearch.xpack.inference.external.http.sender.QueryAndDocsInputs; import org.elasticsearch.xpack.inference.external.http.sender.Sender; import org.elasticsearch.xpack.inference.services.ServiceComponents; -import org.elasticsearch.xpack.inference.services.mixedbread.request.MixedbreadRerankRequest; +import org.elasticsearch.xpack.inference.services.mixedbread.request.rerank.MixedbreadRerankRequest; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; -import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankResponseHandler; +import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadResponseHandler; import org.elasticsearch.xpack.inference.services.mixedbread.response.MixedbreadRerankResponseEntity; import java.util.Map; @@ -27,7 +27,7 @@ public class MixedbreadActionCreator implements MixedbreadActionVisitor { private static final String RERANK_ERROR_PREFIX = "Mixedbread rerank"; - private static final ResponseHandler RERANK_HANDLER = new MixedbreadRerankResponseHandler( + private static final ResponseHandler RERANK_HANDLER = new MixedbreadResponseHandler( "mixedbread rerank", (request, response) -> MixedbreadRerankResponseEntity.fromResponse(response) ); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java similarity index 88% rename from x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java index f9d6e6fdf1126..592721220561d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.inference.services.mixedbread.request; +package org.elasticsearch.xpack.inference.services.mixedbread.request.rerank; import org.apache.http.HttpHeaders; import org.apache.http.client.methods.HttpPost; @@ -16,7 +16,6 @@ import org.elasticsearch.xpack.inference.external.request.HttpRequest; import org.elasticsearch.xpack.inference.external.request.Request; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel; -import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; import java.net.URI; import java.nio.charset.StandardCharsets; @@ -31,7 +30,6 @@ public class MixedbreadRerankRequest implements Request { private final List input; private final Boolean returnDocuments; private final Integer topN; - private final MixedbreadRerankTaskSettings taskSettings; public MixedbreadRerankRequest( String query, @@ -44,7 +42,7 @@ public MixedbreadRerankRequest( this.query = Objects.requireNonNull(query); this.returnDocuments = returnDocuments; this.topN = topN; - taskSettings = model.getTaskSettings(); + model.getTaskSettings(); this.model = Objects.requireNonNull(model); } @@ -53,7 +51,14 @@ public HttpRequest createHttpRequest() { ByteArrayEntity byteEntity = new ByteArrayEntity( Strings.toString( - new MixedbreadRerankRequestEntity(model.getServiceSettings().modelId(), query, input, topN, returnDocuments, taskSettings) + new MixedbreadRerankRequestEntity( + model.getServiceSettings().modelId(), + query, + input, + topN, + returnDocuments, + model.getTaskSettings() + ) ).getBytes(StandardCharsets.UTF_8) ); httpPost.setEntity(byteEntity); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequestEntity.java similarity index 62% rename from x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequestEntity.java index f1a856255ff2d..6bec061c135b4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequestEntity.java @@ -5,23 +5,18 @@ * 2.0. */ -package org.elasticsearch.xpack.inference.services.mixedbread.request; +package org.elasticsearch.xpack.inference.services.mixedbread.request.rerank; import org.elasticsearch.core.Nullable; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadUtils; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; import java.io.IOException; import java.util.List; import java.util.Objects; -import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.INPUT_FIELD; -import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.MODEL_FIELD; -import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.QUERY_FIELD; -import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.RETURN_DOCUMENTS_FIELD; -import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants.TOP_K_FIELD; - public record MixedbreadRerankRequestEntity( String model, String query, @@ -42,20 +37,20 @@ public record MixedbreadRerankRequestEntity( public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.field(MODEL_FIELD, model); - builder.field(QUERY_FIELD, query); - builder.field(INPUT_FIELD, input); + builder.field(MixedbreadUtils.MODEL_FIELD, model); + builder.field(MixedbreadUtils.QUERY_FIELD, query); + builder.field(MixedbreadUtils.INPUT_FIELD, input); if (topN != null) { - builder.field(TOP_K_FIELD, topN); + builder.field(MixedbreadUtils.TOP_K_FIELD, topN); } else if (taskSettings.getTopNDocumentsOnly() != null) { - builder.field(TOP_K_FIELD, taskSettings.getTopNDocumentsOnly()); + builder.field(MixedbreadUtils.TOP_K_FIELD, taskSettings.getTopNDocumentsOnly()); } if (returnDocuments != null) { - builder.field(RETURN_DOCUMENTS_FIELD, returnDocuments); + builder.field(MixedbreadUtils.RETURN_DOCUMENTS_FIELD, returnDocuments); } else if (taskSettings.getReturnDocuments() != null) { - builder.field(RETURN_DOCUMENTS_FIELD, taskSettings.getReturnDocuments()); + builder.field(MixedbreadUtils.RETURN_DOCUMENTS_FIELD, taskSettings.getReturnDocuments()); } builder.endObject(); return builder; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java index b80cefa9b9f9e..0d3fa6dd3516a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java @@ -13,24 +13,19 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; -import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadConstants; +import org.elasticsearch.xpack.inference.services.ServiceUtils; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadModel; import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService; +import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadUtils; import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor; import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; -import java.net.URI; -import java.net.URISyntaxException; import java.util.Map; +import java.util.Objects; import static org.elasticsearch.xpack.inference.external.request.RequestUtils.buildUri; public class MixedbreadRerankModel extends MixedbreadModel { - private URI uri = buildUri( - MixedbreadService.SERVICE_NAME, - MixedbreadConstants.DEFAULT_URI_BUILDER.setPathSegments(MixedbreadConstants.VERSION_1, MixedbreadConstants.RERANK_PATH)::build - ); - public static MixedbreadRerankModel of(MixedbreadRerankModel model, Map taskSettings) { var requestTaskSettings = MixedbreadRerankTaskSettings.fromMap(taskSettings); if (requestTaskSettings.isEmpty() || requestTaskSettings.equals(model.getTaskSettings())) { @@ -50,22 +45,31 @@ public MixedbreadRerankModel( inferenceId, MixedbreadRerankServiceSettings.fromMap(serviceSettings, context), MixedbreadRerankTaskSettings.fromMap(taskSettings), - DefaultSecretSettings.fromMap(secrets) + DefaultSecretSettings.fromMap(secrets), + null ); } // should only be used for testing - public MixedbreadRerankModel( + MixedbreadRerankModel( String modelId, MixedbreadRerankServiceSettings serviceSettings, MixedbreadRerankTaskSettings taskSettings, - @Nullable DefaultSecretSettings secretSettings + @Nullable DefaultSecretSettings secretSettings, + @Nullable String uri ) { super( new ModelConfigurations(modelId, TaskType.RERANK, MixedbreadService.NAME, serviceSettings, taskSettings), new ModelSecrets(secretSettings), secretSettings, - serviceSettings.rateLimitSettings() + serviceSettings.rateLimitSettings(), + Objects.requireNonNullElse( + ServiceUtils.createOptionalUri(uri), + buildUri( + MixedbreadService.SERVICE_NAME, + MixedbreadUtils.DEFAULT_URI_BUILDER.setPathSegments(MixedbreadUtils.VERSION_1, MixedbreadUtils.RERANK_PATH)::build + ) + ) ); } @@ -73,10 +77,6 @@ private MixedbreadRerankModel(MixedbreadRerankModel model, MixedbreadRerankTaskS super(model, taskSettings); } - public MixedbreadRerankModel(MixedbreadRerankModel model, MixedbreadRerankServiceSettings serviceSettings) { - super(model, serviceSettings); - } - @Override public MixedbreadRerankServiceSettings getServiceSettings() { return (MixedbreadRerankServiceSettings) super.getServiceSettings(); @@ -92,19 +92,6 @@ public DefaultSecretSettings getSecretSettings() { return (DefaultSecretSettings) super.getSecretSettings(); } - public URI uri() { - return uri; - } - - // Needed for testing only - public void setURI(String newUri) { - try { - uri = new URI(newUri); - } catch (URISyntaxException e) { - // swallow any error - } - } - /** * Accepts a visitor to create an executable action. The returned action will not return documents in the response. * @param visitor Interface for creating {@link ExecutableAction} instances for Mixedbread models. diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java deleted file mode 100644 index a17f19238f291..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankResponseHandler.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.mixedbread.rerank; - -import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; - -public class MixedbreadRerankResponseHandler extends MixedbreadResponseHandler { - /** - * Constructs a new MixedbreadRerankResponseHandler with the specified request type and response parser. - * - * @param requestType the type of request this handler will process - * @param parseFunction the function to parse the response - */ - public MixedbreadRerankResponseHandler(String requestType, ResponseParser parseFunction) { - super(requestType, parseFunction); - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java index 5d2bf504a869a..1bd04827a89b6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -35,7 +35,7 @@ public class MixedbreadRerankServiceSettings extends FilteredXContentObject impl public static final String WINDOWS_SIZE = "windows_size"; /** - * 100 req / min + * Free subscription tier 100 req / min * Rate Limiting. */ public static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(100); @@ -66,11 +66,7 @@ public static MixedbreadRerankServiceSettings fromMap(Map map, C private final RateLimitSettings rateLimitSettings; private final Integer windowsSize; - public MixedbreadRerankServiceSettings( - @Nullable String model, - @Nullable RateLimitSettings rateLimitSettings, - @Nullable Integer windowsSize - ) { + public MixedbreadRerankServiceSettings(String model, @Nullable RateLimitSettings rateLimitSettings, @Nullable Integer windowsSize) { this.model = model; this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); this.windowsSize = Objects.requireNonNullElse(windowsSize, DEFAULT_WINDOWS_SIZE); @@ -99,7 +95,7 @@ public String getWriteableName() { @Override public TransportVersion getMinimalSupportedVersion() { assert false : "should never be called when supportsVersion is used"; - return MixedbreadUtils.ML_INFERENCE_MIXEDBREAD_ADDED; + return MixedbreadUtils.INFERENCE_MIXEDBREAD_ADDED; } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java index aacb6cb601668..3d31ade1944a1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java @@ -28,7 +28,7 @@ public class MixedbreadRerankTaskSettings implements TaskSettings { public static final String NAME = "mixedbread_rerank_task_settings"; public static final String RETURN_DOCUMENTS = "return_documents"; - public static final String TOP_N_DOCS_ONLY = "top_n"; + public static final String TOP_N = "top_n"; public static final MixedbreadRerankTaskSettings EMPTY_SETTINGS = new MixedbreadRerankTaskSettings(null, null); @@ -40,12 +40,7 @@ public static MixedbreadRerankTaskSettings fromMap(Map map) { } Boolean returnDocuments = extractOptionalBoolean(map, RETURN_DOCUMENTS, validationException); - Integer topNDocumentsOnly = extractOptionalPositiveInteger( - map, - TOP_N_DOCS_ONLY, - ModelConfigurations.TASK_SETTINGS, - validationException - ); + Integer topNDocumentsOnly = extractOptionalPositiveInteger(map, TOP_N, ModelConfigurations.TASK_SETTINGS, validationException); if (validationException.validationErrors().isEmpty() == false) { throw validationException; @@ -55,7 +50,7 @@ public static MixedbreadRerankTaskSettings fromMap(Map map) { return EMPTY_SETTINGS; } - return of(topNDocumentsOnly, returnDocuments); + return new MixedbreadRerankTaskSettings(topNDocumentsOnly, returnDocuments); } /** @@ -70,6 +65,9 @@ public static MixedbreadRerankTaskSettings of( MixedbreadRerankTaskSettings originalSettings, MixedbreadRerankTaskSettings requestTaskSettings ) { + if (requestTaskSettings.isEmpty() || originalSettings.equals(requestTaskSettings)) { + return originalSettings; + } return new MixedbreadRerankTaskSettings( requestTaskSettings.getTopNDocumentsOnly() != null ? requestTaskSettings.getTopNDocumentsOnly() @@ -80,10 +78,6 @@ public static MixedbreadRerankTaskSettings of( ); } - public static MixedbreadRerankTaskSettings of(Integer topNDocumentsOnly, Boolean returnDocuments) { - return new MixedbreadRerankTaskSettings(topNDocumentsOnly, returnDocuments); - } - private final Integer topNDocumentsOnly; private final Boolean returnDocuments; @@ -105,7 +99,7 @@ public boolean isEmpty() { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); if (topNDocumentsOnly != null) { - builder.field(TOP_N_DOCS_ONLY, topNDocumentsOnly); + builder.field(TOP_N, topNDocumentsOnly); } if (returnDocuments != null) { builder.field(RETURN_DOCUMENTS, returnDocuments); @@ -122,7 +116,7 @@ public String getWriteableName() { @Override public TransportVersion getMinimalSupportedVersion() { assert false : "should never be called when supportsVersion is used"; - return MixedbreadUtils.ML_INFERENCE_MIXEDBREAD_ADDED; + return MixedbreadUtils.INFERENCE_MIXEDBREAD_ADDED; } @Override @@ -158,7 +152,7 @@ public Boolean getReturnDocuments() { } @Override - public TaskSettings updatedTaskSettings(Map newSettings) { + public MixedbreadRerankTaskSettings updatedTaskSettings(Map newSettings) { MixedbreadRerankTaskSettings updatedSettings = MixedbreadRerankTaskSettings.fromMap(new HashMap<>(newSettings)); return MixedbreadRerankTaskSettings.of(this, updatedSettings); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadResponseHandler.java index 342a7b0c451e5..92ab4d66e52ba 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadResponseHandler.java @@ -14,8 +14,6 @@ import org.elasticsearch.xpack.inference.external.http.retry.RetryException; import org.elasticsearch.xpack.inference.external.request.Request; -import static org.elasticsearch.core.Strings.format; - public class MixedbreadResponseHandler extends BaseResponseHandler { private static final String FORBIDDEN = "Valid credentials but insufficient permissions for this resource."; private static final String PAYMENT_ERROR_MESSAGE = "Insufficient balance. Top up your account to continue."; @@ -63,8 +61,4 @@ protected void checkForFailureStatusCode(Request request, HttpResult result) thr throw new RetryException(false, buildError(UNSUCCESSFUL, request, result)); } } - - private static String resourceNotFoundError(Request request) { - return format("Resource not found at [%s]", request.getURI()); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java index ccabc4e1fcd0e..f18c4ff156df3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java @@ -48,6 +48,7 @@ public class MixedbreadRerankResponseEntity { * } *

* The response will look like (without whitespace): + *

      *{
      *     "usage": {
      *         "prompt_tokens": 162,
@@ -79,6 +80,7 @@ public class MixedbreadRerankResponseEntity {
      *     "top_k": 3,
      *     "return_input": false
      * }
+     * 
* Parses the response from a Mixedbread rerank request and returns the results. diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiResponseHandler.java index 9f7f32c366bd1..5151fd511a3a5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiResponseHandler.java @@ -24,7 +24,6 @@ import java.util.concurrent.Flow; import java.util.function.Function; -import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.xpack.inference.external.http.retry.ResponseHandlerUtils.getFirstHeaderOrUnknown; public class OpenAiResponseHandler extends BaseResponseHandler { @@ -105,10 +104,6 @@ protected RetryException buildExceptionHandlingContentTooLarge(Request request, return new ContentTooLargeException(buildError(CONTENT_TOO_LARGE, request, result)); } - private static String resourceNotFoundError(Request request) { - return format("Resource not found at [%s]", request.getURI()); - } - protected RetryException buildExceptionHandling429(Request request, HttpResult result) { return new RetryException(true, buildError(buildRateLimitErrorMessage(result), request, result)); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java index 8257e614093f7..fa6887c2a488b 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java @@ -18,7 +18,6 @@ import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.inference.Model; -import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.RerankingInferenceService; import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.http.MockResponse; @@ -27,6 +26,7 @@ import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.inference.Utils; import org.elasticsearch.xpack.inference.external.http.HttpClientManager; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSenderTests; @@ -65,9 +65,16 @@ import static org.mockito.Mockito.mock; public class MixedbreadServiceTests extends InferenceServiceTestCase { + public static final Boolean RETURN_DOCUMENTS_TRUE = true; + public static final Boolean RETURN_DOCUMENTS_FALSE = false; + public static final String DEFAULT_RERANK_URL = "https://api.mixedbread.com/v1/reranking"; + private static final String INFERENCE_ENTITY_ID_VALUE = "id"; - public static final String DEFAULT_RERANK_URL = "https://api.mixedbread.com/v1/rerank"; private static final String MODEL_NAME_VALUE = "modelName"; + private static final String API_KEY = "secret"; + private static final String QUERY_VALUE = "query"; + private static final Integer TOP_N = 3; + private static final Boolean STREAM = false; private static final List INPUT = List.of("candidate1", "candidate2", "candidate3"); private final MockWebServer webServer = new MockWebServer(); private ThreadPool threadPool; @@ -100,7 +107,7 @@ public void testParseRequestConfig_createsRerankModel() throws IOException { service.parseRequestConfig( INFERENCE_ENTITY_ID_VALUE, TaskType.RERANK, - getRequestConfigMap( + Utils.getRequestConfigMap( MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName, requestsPerMinute), MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(topN, returnDocuments), getSecretSettingsMap(apiKey) @@ -128,7 +135,11 @@ public void testParseRequestConfig_onlyRequiredSettings_createsRerankModel() thr service.parseRequestConfig( INFERENCE_ENTITY_ID_VALUE, TaskType.RERANK, - getRequestConfigMap(MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName), getSecretSettingsMap(apiKey)), + Utils.getRequestConfigMap( + MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName), + Map.of(), + getSecretSettingsMap(apiKey) + ), modelListener ); @@ -235,17 +246,16 @@ public void testInfer_Rerank_UnauthorisedResponse() throws IOException { """; webServer.enqueue(new MockResponse().setResponseCode(401).setBody(responseJson)); - var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", 1024, false); - model.setURI(getUrl(webServer)); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, API_KEY, TOP_N, RETURN_DOCUMENTS_FALSE, getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); service.infer( model, - "query", + QUERY_VALUE, null, null, List.of("candidate1", "candidate2"), - false, + STREAM, new HashMap<>(), null, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -259,7 +269,7 @@ public void testInfer_Rerank_UnauthorisedResponse() throws IOException { } } - public void testInfer_Rerank_Get_Response_NoReturnDocuments_NoTopN() throws IOException { + public void testInfer_Rerank_NoReturnDocuments_NoTopN() throws IOException { String responseJson = """ { "usage": { @@ -293,16 +303,15 @@ public void testInfer_Rerank_Get_Response_NoReturnDocuments_NoTopN() throws IOEx try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); - var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", null, false); - model.setURI(getUrl(webServer)); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, API_KEY, null, RETURN_DOCUMENTS_FALSE, getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); service.infer( model, - "query", + QUERY_VALUE, null, null, INPUT, - false, + STREAM, new HashMap<>(), null, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -330,11 +339,14 @@ public void testInfer_Rerank_Get_Response_NoReturnDocuments_NoTopN() throws IOEx assertThat(webServer.requests().getFirst().getHeader(HttpHeaders.AUTHORIZATION), equalTo("Bearer secret")); var requestMap = entityAsMap(webServer.requests().getFirst().getBody()); - assertThat(requestMap, is(Map.of("query", "query", "input", INPUT, "model", MODEL_NAME_VALUE, "return_input", false))); + assertThat( + requestMap, + is(Map.of("query", QUERY_VALUE, "input", INPUT, "model", MODEL_NAME_VALUE, "return_input", RETURN_DOCUMENTS_FALSE)) + ); } } - public void testInfer_Rerank_Get_Response_ReturnDocumentsNull_NoTopN() throws IOException { + public void testInfer_Rerank_ReturnDocumentsNull_NoTopN() throws IOException { String responseJson = """ { "usage": { @@ -370,16 +382,15 @@ public void testInfer_Rerank_Get_Response_ReturnDocumentsNull_NoTopN() throws IO try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); - var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", null, null); - model.setURI(getUrl(webServer)); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, API_KEY, null, null, getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); service.infer( model, - "query", + QUERY_VALUE, null, null, INPUT, - false, + STREAM, new HashMap<>(), null, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -406,12 +417,12 @@ public void testInfer_Rerank_Get_Response_ReturnDocumentsNull_NoTopN() throws IO assertThat(webServer.requests().getFirst().getHeader(HttpHeaders.AUTHORIZATION), equalTo("Bearer secret")); var requestMap = entityAsMap(webServer.requests().getFirst().getBody()); - assertThat(requestMap, is(Map.of("query", "query", "input", INPUT, "model", MODEL_NAME_VALUE))); + assertThat(requestMap, is(Map.of("query", QUERY_VALUE, "input", INPUT, "model", MODEL_NAME_VALUE))); } } - public void testInfer_Rerank_Get_Response_ReturnDocuments_TopN() throws IOException { + public void testInfer_Rerank_ReturnDocuments_TopN() throws IOException { String responseJson = """ { "usage": { @@ -449,16 +460,15 @@ public void testInfer_Rerank_Get_Response_ReturnDocuments_TopN() throws IOExcept try (var service = new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty())) { webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); - var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, "secret", 3, true); - model.setURI(getUrl(webServer)); + var model = MixedbreadRerankModelTests.createModel(MODEL_NAME_VALUE, API_KEY, TOP_N, RETURN_DOCUMENTS_TRUE, getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); service.infer( model, - "query", + QUERY_VALUE, null, null, List.of("candidate1", "candidate2", "candidate3", "candidate4"), - false, + STREAM, new HashMap<>(), null, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -490,13 +500,13 @@ public void testInfer_Rerank_Get_Response_ReturnDocuments_TopN() throws IOExcept is( Map.of( "query", - "query", + QUERY_VALUE, "input", List.of("candidate1", "candidate2", "candidate3", "candidate4"), "model", MODEL_NAME_VALUE, "return_input", - true, + RETURN_DOCUMENTS_TRUE, "top_k", 3 ) @@ -588,28 +598,6 @@ private static void assertCommonModelSettings( assertThat(model.apiKey().toString(), is(apiKey)); } - private Map getRequestConfigMap( - Map serviceSettings, - Map taskSettings, - Map secretSettings - ) { - var builtServiceSettings = new HashMap<>(); - builtServiceSettings.putAll(serviceSettings); - builtServiceSettings.putAll(secretSettings); - - return new HashMap<>( - Map.of(ModelConfigurations.SERVICE_SETTINGS, builtServiceSettings, ModelConfigurations.TASK_SETTINGS, taskSettings) - ); - } - - private Map getRequestConfigMap(Map serviceSettings, Map secretSettings) { - var builtServiceSettings = new HashMap<>(); - builtServiceSettings.putAll(serviceSettings); - builtServiceSettings.putAll(secretSettings); - - return new HashMap<>(Map.of(ModelConfigurations.SERVICE_SETTINGS, builtServiceSettings)); - } - private MixedbreadService createMixedbreadService() { return new MixedbreadService(mock(HttpRequestSender.Factory.class), createWithEmptySettings(threadPool), mockClusterServiceEmpty()); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java index e5157308dc4f8..d45bbb4a88d4b 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/action/MixedbreadActionCreatorTests.java @@ -11,12 +11,10 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.http.MockWebServer; import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.external.http.HttpClientManager; import org.elasticsearch.xpack.inference.external.http.HttpResult; @@ -24,13 +22,11 @@ import org.elasticsearch.xpack.inference.external.http.sender.Sender; import org.elasticsearch.xpack.inference.logging.ThrottlerManager; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModelTests; -import org.hamcrest.MatcherAssert; import org.junit.After; import org.junit.Before; import java.io.IOException; import java.util.List; -import java.util.concurrent.TimeUnit; import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityExecutors; import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; @@ -43,7 +39,6 @@ public class MixedbreadActionCreatorTests extends ESTestCase { private static final String EXPECTED_EXCEPTION = "Failed to send Mixedbread rerank request. Cause: failed"; - private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); private static final QueryAndDocsInputs QUERY_AND_DOCS_INPUTS = new QueryAndDocsInputs( "popular name", List.of("Luke"), @@ -69,16 +64,6 @@ public void shutdown() throws IOException { webServer.close(); } - public void testExecute_ThrowsElasticsearchException() { - var sender = mock(Sender.class); - doThrow(new ElasticsearchException("failed")).when(sender).send(any(), any(), any(), any()); - - var action = createAction("model", "secret", null, null, sender); - ElasticsearchException thrownException = executeActionWithException(action); - - MatcherAssert.assertThat(thrownException.getMessage(), is("failed")); - } - public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled() { var sender = mock(Sender.class); @@ -92,23 +77,7 @@ public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled var action = createAction("model", "secret", null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); - MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); - } - - public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled_WhenUrlIsNull() { - var sender = mock(Sender.class); - - doAnswer(invocation -> { - ActionListener listener = invocation.getArgument(3); - listener.onFailure(new IllegalStateException("failed")); - - return Void.TYPE; - }).when(sender).send(any(), any(), any(), any()); - - var action = createAction("model", "secret", null, null, sender); - ElasticsearchException thrownException = executeActionWithException(action); - - MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); + ESTestCase.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); } public void testExecute_ThrowsException() { @@ -118,28 +87,18 @@ public void testExecute_ThrowsException() { var action = createAction("model", "secret", null, null, sender); ElasticsearchException thrownException = executeActionWithException(action); - MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); - } - - public void testExecute_ThrowsExceptionWithNullUrl() { - var sender = mock(Sender.class); - doThrow(new IllegalArgumentException("failed")).when(sender).send(any(), any(), any(), any()); - - var action = createAction("model", "secret", null, null, sender); - var thrownException = executeActionWithException(action); - - MatcherAssert.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); + ESTestCase.assertThat(thrownException.getMessage(), is(EXPECTED_EXCEPTION)); } private static ElasticsearchException executeActionWithException(ExecutableAction action) { PlainActionFuture listener = new PlainActionFuture<>(); - action.execute(QUERY_AND_DOCS_INPUTS, InferenceAction.Request.DEFAULT_TIMEOUT, listener); - return expectThrows(ElasticsearchException.class, () -> listener.actionGet(TIMEOUT)); + action.execute(QUERY_AND_DOCS_INPUTS, ESTestCase.TEST_REQUEST_TIMEOUT, listener); + return expectThrows(ElasticsearchException.class, () -> listener.actionGet(ESTestCase.TEST_REQUEST_TIMEOUT)); } private ExecutableAction createAction(String modelName, String apiKey, Integer topN, Boolean returnDocuments, Sender sender) { var actionCreator = new MixedbreadActionCreator(sender, createWithEmptySettings(threadPool)); - var model = MixedbreadRerankModelTests.createModel(modelName, apiKey, topN, returnDocuments); + var model = MixedbreadRerankModelTests.createModel(modelName, apiKey, topN, returnDocuments, null); return actionCreator.create(model, null); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java index 55e1d77af4a2c..d7ac7a9331a26 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestEntityTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.services.mixedbread.request.rerank.MixedbreadRerankRequestEntity; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings; import java.io.IOException; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java index 9179617b097d0..977dc533de21d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/request/MixedbreadRerankRequestTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.services.mixedbread.request.rerank.MixedbreadRerankRequest; import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModelTests; import java.io.IOException; @@ -19,6 +20,7 @@ import java.util.Map; import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadServiceTests.RETURN_DOCUMENTS_FALSE; import static org.hamcrest.Matchers.aMapWithSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; @@ -26,10 +28,10 @@ public class MixedbreadRerankRequestTests extends ESTestCase { - private static final String API_KEY = "api_key"; - public static final String INPUT = "input"; - public static final String MODEL = "model"; - public static final String QUERY = "query"; + private static final String API_KEY = "secret"; + public static final String INPUT = "input_value"; + public static final String MODEL = "model_id_value"; + public static final String QUERY = "query_value"; public static final int TOP_K = 1; public void testCreateRequest_WithMinimalFieldsSet() throws IOException { @@ -42,13 +44,13 @@ public void testCreateRequest_WithMinimalFieldsSet() throws IOException { } public void testCreateRequest_WithAllFieldsSet() throws IOException { - var request = createRequest(QUERY, INPUT, MODEL, TOP_K, Boolean.FALSE); + var request = createRequest(QUERY, INPUT, MODEL, TOP_K, RETURN_DOCUMENTS_FALSE); Map requestMap = getEntityAsMap(request); assertThat(requestMap, aMapWithSize(5)); assertThat(requestMap.get("input"), is(List.of(INPUT))); assertThat(requestMap.get("query"), is(QUERY)); assertThat(requestMap.get("top_k"), is(TOP_K)); - assertThat(requestMap.get("return_input"), is(Boolean.FALSE)); + assertThat(requestMap.get("return_input"), is(RETURN_DOCUMENTS_FALSE)); assertThat(requestMap.get("model"), is(MODEL)); } @@ -66,7 +68,7 @@ private static MixedbreadRerankRequest createRequest( @Nullable Integer topN, @Nullable Boolean returnDocuments ) { - var rerankModel = MixedbreadRerankModelTests.createModel(modelId, API_KEY, null, null); + var rerankModel = MixedbreadRerankModelTests.createModel(modelId, API_KEY, null, null, null); return new MixedbreadRerankRequest(query, List.of(input), returnDocuments, topN, rerankModel); } @@ -76,6 +78,7 @@ private Map getEntityAsMap(MixedbreadRerankRequest request) thro var httpPost = (HttpPost) httpRequest.httpRequestBase(); assertThat(httpPost.getLastHeader(HttpHeaders.CONTENT_TYPE).getValue(), is(XContentType.JSON.mediaType())); assertThat(httpPost.getLastHeader(HttpHeaders.AUTHORIZATION).getValue(), is("Bearer " + API_KEY)); + assertThat(httpPost.getURI(), is(sameInstance(request.getURI()))); return entityAsMap(httpPost.getEntity().getContent()); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java index d873f595e3556..cbdeabcf97f42 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java @@ -20,31 +20,29 @@ public class MixedbreadRerankModelTests extends ESTestCase { - public static final String DEFAULT_URL = "https://api.mixedbread.com/v1/rerank"; + public static final String DEFAULT_URL = "https://api.mixedbread.com/v1/reranking"; public static final String CUSTOM_URL = "https://custom.url.com/v1/rerank"; - public static final String MODEL_ID = "model_id"; + public static final String MODEL_ID = "model_id_value"; public static final String API_KEY = "secret"; public void testConstructor_usesDefaultUrlWhenNull() { - var model = createModel(MODEL_ID, API_KEY, null, null); - model.setURI(DEFAULT_URL); + var model = createModel(MODEL_ID, API_KEY, null, null, null); assertThat(model.uri().toString(), is(DEFAULT_URL)); } public void testConstructor_usesUrlWhenSpecified() { - var model = createModel(MODEL_ID, API_KEY, null, null); - model.setURI(CUSTOM_URL); + var model = createModel(MODEL_ID, API_KEY, null, null, CUSTOM_URL); assertThat(model.uri().toString(), is(CUSTOM_URL)); } public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreEmpty() { - var model = createModel(MODEL_ID, API_KEY, 10, true); + var model = createModel(MODEL_ID, API_KEY, 10, true, CUSTOM_URL); var overriddenModel = MixedbreadRerankModel.of(model, Map.of()); assertThat(overriddenModel, sameInstance(model)); } public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreNull() { - var model = createModel(MODEL_ID, API_KEY, 10, true); + var model = createModel(MODEL_ID, API_KEY, 10, true, CUSTOM_URL); var overriddenModel = MixedbreadRerankModel.of(model, null); assertThat(overriddenModel, sameInstance(model)); } @@ -52,25 +50,25 @@ public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreNull() { public void testOf_DoesNotOverrideAndModelRemainsEqual_WhenSettingsAreEqual() { var topN = randomNonNegativeInt(); var returnDocuments = randomBoolean(); - var model = createModel(MODEL_ID, API_KEY, topN, returnDocuments); + var model = createModel(MODEL_ID, API_KEY, topN, returnDocuments, CUSTOM_URL); var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(topN, returnDocuments)); assertThat(overriddenModel, sameInstance(model)); } public void testOf_SetsTopN_FromRequestTaskSettings_OverridingStoredTaskSettings() { - var model = createModel(MODEL_ID, API_KEY, 15, null); + var model = createModel(MODEL_ID, API_KEY, 15, null, CUSTOM_URL); var topNFromRequest = 10; var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(topNFromRequest, null)); - var expectedModel = createModel(MODEL_ID, API_KEY, topNFromRequest, null); + var expectedModel = createModel(MODEL_ID, API_KEY, topNFromRequest, null, CUSTOM_URL); assertThat(overriddenModel, is(expectedModel)); } public void testOf_SetsReturnDocuments_FromRequestTaskSettings() { var topN = 15; - var model = createModel(MODEL_ID, API_KEY, topN, true); + var model = createModel(MODEL_ID, API_KEY, topN, true, CUSTOM_URL); var returnDocumentsFromRequest = false; var overriddenModel = MixedbreadRerankModel.of(model, getTaskSettingsMap(null, returnDocumentsFromRequest)); - var expectedModel = createModel(MODEL_ID, API_KEY, topN, returnDocumentsFromRequest); + var expectedModel = createModel(MODEL_ID, API_KEY, topN, returnDocumentsFromRequest, CUSTOM_URL); assertThat(overriddenModel, is(expectedModel)); } @@ -78,13 +76,15 @@ public static MixedbreadRerankModel createModel( String model, String apiKey, @Nullable Integer topN, - @Nullable Boolean returnDocuments + @Nullable Boolean returnDocuments, + String uri ) { return new MixedbreadRerankModel( model, new MixedbreadRerankServiceSettings(model, null, null), new MixedbreadRerankTaskSettings(topN, returnDocuments), - new DefaultSecretSettings(new SecureString(apiKey.toCharArray())) + new DefaultSecretSettings(new SecureString(apiKey.toCharArray())), + uri ); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java index 6449c42281c1e..c4461130892eb 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java @@ -16,9 +16,15 @@ import java.util.HashMap; import java.util.Map; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadServiceTests.RETURN_DOCUMENTS_FALSE; +import static org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadServiceTests.RETURN_DOCUMENTS_TRUE; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; public class MixedbreadRerankTaskSettingsTests extends AbstractWireSerializingTestCase { + private static final int TOP_N = 7; + private static final int TOP_N_UPDATE_VALUE = 8; public static MixedbreadRerankTaskSettings createRandom() { var returnDocuments = randomOptionalBoolean(); @@ -30,13 +36,13 @@ public static MixedbreadRerankTaskSettings createRandom() { public void testFromMap_WithValidValues_ReturnsSettings() { Map taskMap = Map.of( MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, - true, - MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, - 5 + RETURN_DOCUMENTS_TRUE, + MixedbreadRerankTaskSettings.TOP_N, + TOP_N ); var settings = MixedbreadRerankTaskSettings.fromMap(new HashMap<>(taskMap)); assertTrue(settings.getReturnDocuments()); - assertEquals(5, settings.getTopNDocumentsOnly().intValue()); + assertEquals(TOP_N, settings.getTopNDocumentsOnly().intValue()); } public void testFromMap_WithNullValues_ReturnsSettingsWithNulls() { @@ -49,8 +55,8 @@ public void testFromMap_WithInvalidReturnDocuments_ThrowsValidationException() { Map taskMap = Map.of( MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, "invalid", - MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, - 5 + MixedbreadRerankTaskSettings.TOP_N, + TOP_N ); var thrownException = expectThrows(ValidationException.class, () -> MixedbreadRerankTaskSettings.fromMap(new HashMap<>(taskMap))); assertThat(thrownException.getMessage(), containsString("field [return_documents] is not of the expected type")); @@ -59,8 +65,8 @@ public void testFromMap_WithInvalidReturnDocuments_ThrowsValidationException() { public void testFromMap_WithInvalidTopNDocsOnly_ThrowsValidationException() { Map taskMap = Map.of( MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, - true, - MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, + RETURN_DOCUMENTS_TRUE, + MixedbreadRerankTaskSettings.TOP_N, "invalid" ); var thrownException = expectThrows(ValidationException.class, () -> MixedbreadRerankTaskSettings.fromMap(new HashMap<>(taskMap))); @@ -68,38 +74,38 @@ public void testFromMap_WithInvalidTopNDocsOnly_ThrowsValidationException() { } public void testUpdatedTaskSettings_WithEmptyMap_ReturnsSameSettings() { - var initialSettings = new MixedbreadRerankTaskSettings(5, true); - MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(Map.of()); - assertEquals(initialSettings, updatedSettings); + var initialSettings = new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE); + MixedbreadRerankTaskSettings updatedSettings = initialSettings.updatedTaskSettings(Map.of()); + assertThat(initialSettings, is(sameInstance(updatedSettings))); } public void testUpdatedTaskSettings_WithNewReturnDocuments_ReturnsUpdatedSettings() { - var initialSettings = new MixedbreadRerankTaskSettings(5, true); - Map newSettings = Map.of(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, false); - MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(newSettings); + var initialSettings = new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE); + Map newSettings = Map.of(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, RETURN_DOCUMENTS_FALSE); + MixedbreadRerankTaskSettings updatedSettings = initialSettings.updatedTaskSettings(newSettings); assertFalse(updatedSettings.getReturnDocuments()); assertEquals(initialSettings.getTopNDocumentsOnly(), updatedSettings.getTopNDocumentsOnly()); } public void testUpdatedTaskSettings_WithNewTopNDocsOnly_ReturnsUpdatedSettings() { - var initialSettings = new MixedbreadRerankTaskSettings(5, true); - Map newSettings = Map.of(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, 7); - MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(newSettings); - assertEquals(7, updatedSettings.getTopNDocumentsOnly().intValue()); + var initialSettings = new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE); + Map newSettings = Map.of(MixedbreadRerankTaskSettings.TOP_N, TOP_N_UPDATE_VALUE); + MixedbreadRerankTaskSettings updatedSettings = initialSettings.updatedTaskSettings(newSettings); + assertEquals(TOP_N_UPDATE_VALUE, updatedSettings.getTopNDocumentsOnly().intValue()); assertEquals(initialSettings.getReturnDocuments(), updatedSettings.getReturnDocuments()); } public void testUpdatedTaskSettings_WithMultipleNewValues_ReturnsUpdatedSettings() { - var initialSettings = new MixedbreadRerankTaskSettings(5, true); + var initialSettings = new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE); Map newSettings = Map.of( MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, - false, - MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, - 7 + RETURN_DOCUMENTS_FALSE, + MixedbreadRerankTaskSettings.TOP_N, + TOP_N_UPDATE_VALUE ); - MixedbreadRerankTaskSettings updatedSettings = (MixedbreadRerankTaskSettings) initialSettings.updatedTaskSettings(newSettings); + MixedbreadRerankTaskSettings updatedSettings = initialSettings.updatedTaskSettings(newSettings); assertFalse(updatedSettings.getReturnDocuments()); - assertEquals(7, updatedSettings.getTopNDocumentsOnly().intValue()); + assertEquals(TOP_N_UPDATE_VALUE, updatedSettings.getTopNDocumentsOnly().intValue()); } @Override @@ -127,7 +133,7 @@ public static Map getTaskSettingsMap(@Nullable Integer topNDocum var map = new HashMap(); if (topNDocumentsOnly != null) { - map.put(MixedbreadRerankTaskSettings.TOP_N_DOCS_ONLY, topNDocumentsOnly); + map.put(MixedbreadRerankTaskSettings.TOP_N, topNDocumentsOnly); } if (returnDocuments != null) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java index 7ba93ed95852f..2d21fdc5e4b97 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntityTests.java @@ -47,9 +47,7 @@ public void testResponseLiteral() throws IOException { ); assertThat(parsedResults, instanceOf(RankedDocsResults.class)); - for (int i = 0; i < ((RankedDocsResults) parsedResults).getRankedDocs().size(); i++) { - assertEquals(((RankedDocsResults) parsedResults).getRankedDocs().get(i).index(), RESPONSE_LITERAL_DOCS.get(i).index()); - } + assertThat(((RankedDocsResults) parsedResults).getRankedDocs(), is(RESPONSE_LITERAL_DOCS)); } public void testResponseLiteralWithDocumentsAsString() throws IOException { From a20144fe07f21845a9a5e12485239078c59bef34 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 2 Feb 2026 16:24:54 +0000 Subject: [PATCH 22/48] [CI] Update transport version definitions --- .../definitions/referable/ml_inference_mixedbread_added.csv | 2 +- server/src/main/resources/transport/upper_bounds/9.4.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv index 8988ac60abe1d..f43c057011643 100644 --- a/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv +++ b/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv @@ -1 +1 @@ -9270000 +9271000 diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv index 68595a37842bb..7c45b6f2fdf11 100644 --- a/server/src/main/resources/transport/upper_bounds/9.4.csv +++ b/server/src/main/resources/transport/upper_bounds/9.4.csv @@ -1 +1 @@ -bulk_by_scroll_request_includes_relocation_field,9270000 +ml_inference_mixedbread_added,9271000 From 9b1a82c831b6d4c04e3b5bcf8a8e80e77788db6f Mon Sep 17 00:00:00 2001 From: Evgenii Kazannik Date: Mon, 2 Feb 2026 20:45:36 +0100 Subject: [PATCH 23/48] Address comments --- .../services/mixedbread/MixedbreadModel.java | 2 +- .../mixedbread/MixedbreadService.java | 16 +- .../services/mixedbread/MixedbreadUtils.java | 2 +- .../rerank/MixedbreadRerankRequest.java | 1 - .../rerank/MixedbreadRerankRequestEntity.java | 4 +- .../MixedbreadRerankServiceSettings.java | 14 +- .../rerank/MixedbreadRerankTaskSettings.java | 32 +-- .../MixedbreadRerankResponseEntity.java | 2 + .../mixedbread/MixedbreadServiceTests.java | 270 ++++++++++++++++-- .../rerank/MixedbreadRerankModelTests.java | 4 +- .../MixedbreadRerankServiceSettingsTests.java | 17 +- .../MixedbreadRerankTaskSettingsTests.java | 18 +- 12 files changed, 299 insertions(+), 83 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java index 75ffb0be44dde..5fb51c966552a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java @@ -30,7 +30,7 @@ public abstract class MixedbreadModel extends RateLimitGroupingModel { private final SecureString apiKey; private final RateLimitSettings rateLimitServiceSettings; - protected URI uri; + private final URI uri; public MixedbreadModel( ModelConfigurations configurations, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java index 5558f134e4af6..5fa7b15768662 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java @@ -62,16 +62,20 @@ public class MixedbreadService extends SenderService implements RerankingInferen private static final EnumSet SUPPORTED_TASK_TYPES = EnumSet.of(TaskType.RERANK); private static final Map RERANKERS_INPUT_SIZE = Map.of( - "mixedbread-ai/mxbai-rerank-xsmall-v1", - 512, - "mixedbread-ai/mxbai-rerank-base-v1", - 512, - "mixedbread-ai/mxbai-rerank-large-v1", - 512 // Windows size. // The v1 models: 512 // The v2 models: at least 8k // https://www.mixedbread.com/docs/models/reranking/mxbai-rerank-large-v1 + + // rerankerWindowSize() method returns the size in words, not in tokens, so we'll need to translate + // tokens to words by multiplying by 0.75 and rounding down + // https://github.com/elastic/elasticsearch/pull/132169 + "mixedbread-ai/mxbai-rerank-xsmall-v1", + 300, + "mixedbread-ai/mxbai-rerank-base-v1", + 300, + "mixedbread-ai/mxbai-rerank-large-v1", + 300 ); /** diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java index a8de119445ac1..977cc2eeb0203 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadUtils.java @@ -36,7 +36,7 @@ public final class MixedbreadUtils { /** * TransportVersion indicating when Mixedbread features were added. */ - public static final TransportVersion INFERENCE_MIXEDBREAD_ADDED = TransportVersion.fromName("ml_inference_mixedbread_added"); + public static final TransportVersion INFERENCE_MIXEDBREAD_ADDED = TransportVersion.fromName("inference_mixedbread_added"); /** * Checks if the given TransportVersion supports Mixedbread features. diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java index 592721220561d..8d52457afd0d4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java @@ -42,7 +42,6 @@ public MixedbreadRerankRequest( this.query = Objects.requireNonNull(query); this.returnDocuments = returnDocuments; this.topN = topN; - model.getTaskSettings(); this.model = Objects.requireNonNull(model); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequestEntity.java index 6bec061c135b4..55616712b2cfd 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequestEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequestEntity.java @@ -43,8 +43,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (topN != null) { builder.field(MixedbreadUtils.TOP_K_FIELD, topN); - } else if (taskSettings.getTopNDocumentsOnly() != null) { - builder.field(MixedbreadUtils.TOP_K_FIELD, taskSettings.getTopNDocumentsOnly()); + } else if (taskSettings.getTopN() != null) { + builder.field(MixedbreadUtils.TOP_K_FIELD, taskSettings.getTopN()); } if (returnDocuments != null) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java index 1bd04827a89b6..354d528a25210 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java @@ -26,26 +26,22 @@ import java.util.Objects; import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalInteger; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; public class MixedbreadRerankServiceSettings extends FilteredXContentObject implements ServiceSettings { public static final String NAME = "mixedbread_rerank_service_settings"; - public static final String WINDOWS_SIZE = "windows_size"; /** * Free subscription tier 100 req / min * Rate Limiting. */ public static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(100); - private static final Integer DEFAULT_WINDOWS_SIZE = 8000; public static MixedbreadRerankServiceSettings fromMap(Map map, ConfigurationParseContext context) { ValidationException validationException = new ValidationException(); String model = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); - Integer windowsSize = extractOptionalInteger(map, WINDOWS_SIZE, ModelConfigurations.SERVICE_SETTINGS, validationException); RateLimitSettings rateLimitSettings = RateLimitSettings.of( map, DEFAULT_RATE_LIMIT_SETTINGS, @@ -58,24 +54,21 @@ public static MixedbreadRerankServiceSettings fromMap(Map map, C throw validationException; } - return new MixedbreadRerankServiceSettings(model, rateLimitSettings, windowsSize); + return new MixedbreadRerankServiceSettings(model, rateLimitSettings); } private final String model; private final RateLimitSettings rateLimitSettings; - private final Integer windowsSize; - public MixedbreadRerankServiceSettings(String model, @Nullable RateLimitSettings rateLimitSettings, @Nullable Integer windowsSize) { + public MixedbreadRerankServiceSettings(String model, @Nullable RateLimitSettings rateLimitSettings) { this.model = model; this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); - this.windowsSize = Objects.requireNonNullElse(windowsSize, DEFAULT_WINDOWS_SIZE); } public MixedbreadRerankServiceSettings(StreamInput in) throws IOException { this.model = in.readOptionalString(); this.rateLimitSettings = new RateLimitSettings(in); - this.windowsSize = in.readOptionalInt(); } @Override @@ -111,8 +104,6 @@ protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder buil rateLimitSettings.toXContent(builder, params); - builder.field(WINDOWS_SIZE, windowsSize); - return builder; } @@ -131,7 +122,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(model); rateLimitSettings.writeTo(out); - out.writeOptionalInt(windowsSize); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java index 3d31ade1944a1..a9c337e02c6a7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettings.java @@ -40,17 +40,17 @@ public static MixedbreadRerankTaskSettings fromMap(Map map) { } Boolean returnDocuments = extractOptionalBoolean(map, RETURN_DOCUMENTS, validationException); - Integer topNDocumentsOnly = extractOptionalPositiveInteger(map, TOP_N, ModelConfigurations.TASK_SETTINGS, validationException); + Integer topN = extractOptionalPositiveInteger(map, TOP_N, ModelConfigurations.TASK_SETTINGS, validationException); if (validationException.validationErrors().isEmpty() == false) { throw validationException; } - if (returnDocuments == null && topNDocumentsOnly == null) { + if (returnDocuments == null && topN == null) { return EMPTY_SETTINGS; } - return new MixedbreadRerankTaskSettings(topNDocumentsOnly, returnDocuments); + return new MixedbreadRerankTaskSettings(topN, returnDocuments); } /** @@ -69,37 +69,35 @@ public static MixedbreadRerankTaskSettings of( return originalSettings; } return new MixedbreadRerankTaskSettings( - requestTaskSettings.getTopNDocumentsOnly() != null - ? requestTaskSettings.getTopNDocumentsOnly() - : originalSettings.getTopNDocumentsOnly(), + requestTaskSettings.getTopN() != null ? requestTaskSettings.getTopN() : originalSettings.getTopN(), requestTaskSettings.getReturnDocuments() != null ? requestTaskSettings.getReturnDocuments() : originalSettings.getReturnDocuments() ); } - private final Integer topNDocumentsOnly; + private final Integer topN; private final Boolean returnDocuments; public MixedbreadRerankTaskSettings(StreamInput in) throws IOException { this(in.readOptionalVInt(), in.readOptionalBoolean()); } - public MixedbreadRerankTaskSettings(@Nullable Integer topNDocumentsOnly, @Nullable Boolean doReturnDocuments) { - this.topNDocumentsOnly = topNDocumentsOnly; + public MixedbreadRerankTaskSettings(@Nullable Integer topN, @Nullable Boolean doReturnDocuments) { + this.topN = topN; this.returnDocuments = doReturnDocuments; } @Override public boolean isEmpty() { - return topNDocumentsOnly == null && returnDocuments == null; + return topN == null && returnDocuments == null; } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - if (topNDocumentsOnly != null) { - builder.field(TOP_N, topNDocumentsOnly); + if (topN != null) { + builder.field(TOP_N, topN); } if (returnDocuments != null) { builder.field(RETURN_DOCUMENTS, returnDocuments); @@ -126,7 +124,7 @@ public boolean supportsVersion(TransportVersion version) { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalVInt(topNDocumentsOnly); + out.writeOptionalVInt(topN); out.writeOptionalBoolean(returnDocuments); } @@ -135,16 +133,16 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; MixedbreadRerankTaskSettings that = (MixedbreadRerankTaskSettings) o; - return Objects.equals(returnDocuments, that.returnDocuments) && Objects.equals(topNDocumentsOnly, that.topNDocumentsOnly); + return Objects.equals(returnDocuments, that.returnDocuments) && Objects.equals(topN, that.topN); } @Override public int hashCode() { - return Objects.hash(returnDocuments, topNDocumentsOnly); + return Objects.hash(returnDocuments, topN); } - public Integer getTopNDocumentsOnly() { - return topNDocumentsOnly; + public Integer getTopN() { + return topN; } public Boolean getReturnDocuments() { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java index f18c4ff156df3..deffd05aa2563 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java @@ -32,6 +32,7 @@ public class MixedbreadRerankResponseEntity { * Parses the Mixedbread rerank response. * For a request like: + *
      *{
      *   "model": "mixedbread-ai/mxbai-rerank-xsmall-v1",
      *   "query": "Who is the author of To Kill a Mockingbird?",
@@ -46,6 +47,7 @@ public class MixedbreadRerankResponseEntity {
      *   "top_k": 3,
      *   "return_input": false
      * }
+     * 
*

* The response will look like (without whitespace): *

diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
index fa6887c2a488b..7433989185c76 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
@@ -9,7 +9,10 @@
 
 import org.apache.http.HttpHeaders;
 import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.ElasticsearchStatusException;
+import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.PlainActionFuture;
+import org.elasticsearch.common.ValidationException;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.settings.Settings;
@@ -26,7 +29,6 @@
 import org.elasticsearch.xcontent.ToXContent;
 import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.xpack.core.inference.action.InferenceAction;
-import org.elasticsearch.xpack.inference.Utils;
 import org.elasticsearch.xpack.inference.external.http.HttpClientManager;
 import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender;
 import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSenderTests;
@@ -35,10 +37,10 @@
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel;
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModelTests;
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettings;
-import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettingsTests;
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings;
-import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettingsTests;
 import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.Matchers;
 import org.junit.After;
 import org.junit.Before;
 
@@ -48,14 +50,17 @@
 import java.util.Map;
 
 import static org.elasticsearch.common.xcontent.XContentHelper.toXContent;
+import static org.elasticsearch.inference.TaskType.RERANK;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent;
 import static org.elasticsearch.xpack.inference.Utils.getPersistedConfigMap;
+import static org.elasticsearch.xpack.inference.Utils.getRequestConfigMap;
 import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityExecutors;
 import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty;
 import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap;
 import static org.elasticsearch.xpack.inference.external.http.Utils.getUrl;
 import static org.elasticsearch.xpack.inference.services.ServiceComponentsTests.createWithEmptySettings;
-import static org.elasticsearch.xpack.inference.services.jinaai.JinaAIServiceSettingsTests.getServiceSettingsMap;
+import static org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettingsTests.getServiceSettingsMap;
+import static org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettingsTests.getTaskSettingsMap;
 import static org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettingsTests.getSecretSettingsMap;
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.Matchers.containsString;
@@ -65,17 +70,21 @@
 import static org.mockito.Mockito.mock;
 
 public class MixedbreadServiceTests extends InferenceServiceTestCase {
+    public static final String UNKNOWN_SETTINGS_EXCEPTION =
+        "Configuration contains settings [{extra_key=value}] unknown to the [mixedbread] service";
     public static final Boolean RETURN_DOCUMENTS_TRUE = true;
     public static final Boolean RETURN_DOCUMENTS_FALSE = false;
     public static final String DEFAULT_RERANK_URL = "https://api.mixedbread.com/v1/reranking";
 
-    private static final String INFERENCE_ENTITY_ID_VALUE = "id";
+    private static final String INFERENCE_ID_VALUE = "id";
     private static final String MODEL_NAME_VALUE = "modelName";
     private static final String API_KEY = "secret";
     private static final String QUERY_VALUE = "query";
     private static final Integer TOP_N = 3;
+    private static final Integer REQUESTS_PER_MINUTE = 3;
     private static final Boolean STREAM = false;
     private static final List INPUT = List.of("candidate1", "candidate2", "candidate3");
+
     private final MockWebServer webServer = new MockWebServer();
     private ThreadPool threadPool;
     private HttpClientManager clientManager;
@@ -105,11 +114,11 @@ public void testParseRequestConfig_createsRerankModel() throws IOException {
             var modelListener = new PlainActionFuture();
 
             service.parseRequestConfig(
-                INFERENCE_ENTITY_ID_VALUE,
+                INFERENCE_ID_VALUE,
                 TaskType.RERANK,
-                Utils.getRequestConfigMap(
-                    MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName, requestsPerMinute),
-                    MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(topN, returnDocuments),
+                getRequestConfigMap(
+                    getServiceSettingsMap(modelName, requestsPerMinute),
+                    getTaskSettingsMap(topN, returnDocuments),
                     getSecretSettingsMap(apiKey)
                 ),
                 modelListener
@@ -133,13 +142,9 @@ public void testParseRequestConfig_onlyRequiredSettings_createsRerankModel() thr
             var modelListener = new PlainActionFuture();
 
             service.parseRequestConfig(
-                INFERENCE_ENTITY_ID_VALUE,
+                INFERENCE_ID_VALUE,
                 TaskType.RERANK,
-                Utils.getRequestConfigMap(
-                    MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName),
-                    Map.of(),
-                    getSecretSettingsMap(apiKey)
-                ),
+                getRequestConfigMap(getServiceSettingsMap(modelName), Map.of(), getSecretSettingsMap(apiKey)),
                 modelListener
             );
 
@@ -163,13 +168,13 @@ public void testParsePersistedConfigWithSecrets_createsRerankModel() throws IOEx
             var apiKey = randomAlphanumericOfLength(8);
 
             var persistedConfig = getPersistedConfigMap(
-                MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName, requestsPerMinute),
-                MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(topN, returnDocuments),
+                getServiceSettingsMap(modelName, requestsPerMinute),
+                getTaskSettingsMap(topN, returnDocuments),
                 getSecretSettingsMap(apiKey)
             );
 
             var model = service.parsePersistedConfigWithSecrets(
-                INFERENCE_ENTITY_ID_VALUE,
+                INFERENCE_ID_VALUE,
                 TaskType.RERANK,
                 persistedConfig.config(),
                 persistedConfig.secrets()
@@ -193,7 +198,7 @@ public void testParsePersistedConfigWithSecrets_onlyRequiredSettings_createsRera
             var persistedConfig = getPersistedConfigMap(getServiceSettingsMap(modelName, null), Map.of(), getSecretSettingsMap(apiKey));
 
             var model = service.parsePersistedConfigWithSecrets(
-                INFERENCE_ENTITY_ID_VALUE,
+                INFERENCE_ID_VALUE,
                 TaskType.RERANK,
                 persistedConfig.config(),
                 persistedConfig.secrets()
@@ -217,12 +222,12 @@ public void testParsePersistedConfig_createsRerankModel() throws IOException {
             var returnDocuments = randomBoolean();
 
             var persistedConfig = getPersistedConfigMap(
-                MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(modelName, requestsPerMinute),
-                MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(topN, returnDocuments),
+                getServiceSettingsMap(modelName, requestsPerMinute),
+                getTaskSettingsMap(topN, returnDocuments),
                 null
             );
 
-            var model = service.parsePersistedConfig(INFERENCE_ENTITY_ID_VALUE, TaskType.RERANK, persistedConfig.config());
+            var model = service.parsePersistedConfig(INFERENCE_ID_VALUE, TaskType.RERANK, persistedConfig.config());
 
             assertRerankModelSettings(
                 model,
@@ -234,6 +239,227 @@ public void testParsePersistedConfig_createsRerankModel() throws IOException {
         }
     }
 
+    public void testParseRequestConfig_NoModelId_ThrowsException() throws IOException {
+        try (var service = createMixedbreadService()) {
+            ActionListener modelListener = ActionListener.wrap(
+                model -> fail("Expected exception, but got model: " + model),
+                exception -> {
+                    assertThat(exception, instanceOf(ValidationException.class));
+                    assertThat(
+                        exception.getMessage(),
+                        Matchers.is("Validation Failed: 1: [service_settings] does not contain the required setting [model_id];")
+                    );
+                }
+            );
+
+            service.parseRequestConfig(
+                INFERENCE_ID_VALUE,
+                TaskType.RERANK,
+                getRequestConfigMap(
+                    getServiceSettingsMap(null, REQUESTS_PER_MINUTE),
+                    getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE),
+                    getSecretSettingsMap(API_KEY)
+                ),
+                modelListener
+            );
+        }
+    }
+
+    public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInRerankSecretSettingsMap() throws IOException {
+        try (var service = createMixedbreadService()) {
+            var secretSettings = getSecretSettingsMap(API_KEY);
+            secretSettings.put("extra_key", "value");
+
+            var config = getRequestConfigMap(
+                getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE),
+                getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE),
+                secretSettings
+            );
+
+            assertThrowsExceptionWhenAnExtraKeyExists(service, config);
+        }
+    }
+
+    public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInRerankServiceSettingsMap() throws IOException {
+        try (var service = createMixedbreadService()) {
+            var serviceSettings = getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE);
+            serviceSettings.put("extra_key", "value");
+
+            var config = getRequestConfigMap(
+                serviceSettings,
+                getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE),
+                getSecretSettingsMap(API_KEY)
+            );
+
+            assertThrowsExceptionWhenAnExtraKeyExists(service, config);
+        }
+    }
+
+    public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInRerankTaskSettingsMap() throws IOException {
+        try (var service = createMixedbreadService()) {
+            var taskSettings = getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE);
+            taskSettings.put("extra_key", "value");
+
+            var config = getRequestConfigMap(
+                getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE),
+                taskSettings,
+                getSecretSettingsMap(API_KEY)
+            );
+
+            assertThrowsExceptionWhenAnExtraKeyExists(service, config);
+        }
+    }
+
+    private static void assertThrowsExceptionWhenAnExtraKeyExists(MixedbreadService service, Map config) {
+        ActionListener modelVerificationListener = ActionListener.wrap(
+            model -> fail("Expected exception, but got model: " + model),
+            exception -> {
+                assertThat(exception, instanceOf(ElasticsearchStatusException.class));
+                assertThat(exception.getMessage(), Matchers.is(UNKNOWN_SETTINGS_EXCEPTION));
+            }
+        );
+
+        service.parseRequestConfig(INFERENCE_ID_VALUE, RERANK, config, modelVerificationListener);
+    }
+
+    public void testParsePersistedConfig_DoesNotThrowWhenAnExtraKeyExistsInSecretsSettings() throws IOException {
+        var serviceSettings = getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE);
+        var taskSettings = getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE);
+        var secretSettings = getSecretSettingsMap(API_KEY);
+
+        try (var service = createMixedbreadService()) {
+            secretSettings.put("extra_key", "value");
+
+            var persistedConfig = getPersistedConfigMap(serviceSettings, taskSettings, secretSettings);
+
+            var model = service.parsePersistedConfig(INFERENCE_ID_VALUE, RERANK, persistedConfig.config());
+
+            assertThat(model, CoreMatchers.instanceOf(MixedbreadRerankModel.class));
+
+            var rerankModel = (MixedbreadRerankModel) model;
+            assertThat(rerankModel.getServiceSettings().modelId(), is(MODEL_NAME_VALUE));
+            assertThat(rerankModel.getTaskSettings(), is(new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE)));
+        }
+    }
+
+    public void testParsePersistedConfig_DoesNotThrowWhenAnExtraKeyExistsInServiceSettings() throws IOException {
+        var serviceSettings = getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE);
+        var taskSettings = getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE);
+        var secretSettings = getSecretSettingsMap(API_KEY);
+
+        try (var service = createMixedbreadService()) {
+            serviceSettings.put("extra_key", "value");
+
+            var persistedConfig = getPersistedConfigMap(serviceSettings, taskSettings, secretSettings);
+
+            var model = service.parsePersistedConfig(INFERENCE_ID_VALUE, RERANK, persistedConfig.config());
+
+            assertThat(model, CoreMatchers.instanceOf(MixedbreadRerankModel.class));
+
+            var rerankModel = (MixedbreadRerankModel) model;
+            assertThat(rerankModel.getServiceSettings().modelId(), is(MODEL_NAME_VALUE));
+            assertThat(rerankModel.getTaskSettings(), is(new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE)));
+        }
+    }
+
+    public void testParsePersistedConfig_DoesNotThrowWhenAnExtraKeyExistsInTaskSettings() throws IOException {
+        var serviceSettings = getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE);
+        var taskSettings = getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE);
+        var secretSettings = getSecretSettingsMap(API_KEY);
+
+        try (var service = createMixedbreadService()) {
+            taskSettings.put("extra_key", "value");
+
+            var persistedConfig = getPersistedConfigMap(serviceSettings, taskSettings, secretSettings);
+
+            var model = service.parsePersistedConfig(INFERENCE_ID_VALUE, RERANK, persistedConfig.config());
+
+            assertThat(model, CoreMatchers.instanceOf(MixedbreadRerankModel.class));
+
+            var rerankModel = (MixedbreadRerankModel) model;
+            assertThat(rerankModel.getServiceSettings().modelId(), is(MODEL_NAME_VALUE));
+            assertThat(rerankModel.getTaskSettings(), is(new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE)));
+        }
+    }
+
+    public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExistsInSecretsSettings() throws IOException {
+        var serviceSettings = getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE);
+        var taskSettings = getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE);
+        var secretSettings = getSecretSettingsMap(API_KEY);
+
+        try (var service = createMixedbreadService()) {
+            secretSettings.put("extra_key", "value");
+
+            var persistedConfig = getPersistedConfigMap(serviceSettings, taskSettings, secretSettings);
+
+            var model = service.parsePersistedConfigWithSecrets(
+                INFERENCE_ID_VALUE,
+                RERANK,
+                persistedConfig.config(),
+                persistedConfig.secrets()
+            );
+
+            assertThat(model, CoreMatchers.instanceOf(MixedbreadRerankModel.class));
+
+            var rerankModel = (MixedbreadRerankModel) model;
+            assertThat(rerankModel.getServiceSettings().modelId(), is(MODEL_NAME_VALUE));
+            assertThat(rerankModel.getTaskSettings(), is(new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE)));
+            assertThat(rerankModel.getSecretSettings().apiKey(), is(API_KEY));
+        }
+    }
+
+    public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExistsInServiceSettings() throws IOException {
+        var serviceSettings = getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE);
+        var taskSettings = getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE);
+        var secretSettings = getSecretSettingsMap(API_KEY);
+
+        try (var service = createMixedbreadService()) {
+            serviceSettings.put("extra_key", "value");
+
+            var persistedConfig = getPersistedConfigMap(serviceSettings, taskSettings, secretSettings);
+
+            var model = service.parsePersistedConfigWithSecrets(
+                INFERENCE_ID_VALUE,
+                RERANK,
+                persistedConfig.config(),
+                persistedConfig.secrets()
+            );
+
+            assertThat(model, CoreMatchers.instanceOf(MixedbreadRerankModel.class));
+
+            var rerankModel = (MixedbreadRerankModel) model;
+            assertThat(rerankModel.getServiceSettings().modelId(), is(MODEL_NAME_VALUE));
+            assertThat(rerankModel.getTaskSettings(), is(new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE)));
+            assertThat(rerankModel.getSecretSettings().apiKey(), is(API_KEY));
+        }
+    }
+
+    public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExistsInTaskSettings() throws IOException {
+        var serviceSettings = getServiceSettingsMap(MODEL_NAME_VALUE, REQUESTS_PER_MINUTE);
+        var taskSettings = getTaskSettingsMap(TOP_N, RETURN_DOCUMENTS_TRUE);
+        var secretSettings = getSecretSettingsMap(API_KEY);
+
+        try (var service = createMixedbreadService()) {
+            taskSettings.put("extra_key", "value");
+
+            var persistedConfig = getPersistedConfigMap(serviceSettings, taskSettings, secretSettings);
+
+            var model = service.parsePersistedConfigWithSecrets(
+                INFERENCE_ID_VALUE,
+                RERANK,
+                persistedConfig.config(),
+                persistedConfig.secrets()
+            );
+
+            assertThat(model, CoreMatchers.instanceOf(MixedbreadRerankModel.class));
+
+            var rerankModel = (MixedbreadRerankModel) model;
+            assertThat(rerankModel.getServiceSettings().modelId(), is(MODEL_NAME_VALUE));
+            assertThat(rerankModel.getTaskSettings(), is(new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE)));
+            assertThat(rerankModel.getSecretSettings().apiKey(), is(API_KEY));
+        }
+    }
+
     public void testInfer_Rerank_UnauthorisedResponse() throws IOException {
         var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager);
 
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java
index cbdeabcf97f42..66fa58c414851 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelTests.java
@@ -77,11 +77,11 @@ public static MixedbreadRerankModel createModel(
         String apiKey,
         @Nullable Integer topN,
         @Nullable Boolean returnDocuments,
-        String uri
+        @Nullable String uri
     ) {
         return new MixedbreadRerankModel(
             model,
-            new MixedbreadRerankServiceSettings(model, null, null),
+            new MixedbreadRerankServiceSettings(model, null),
             new MixedbreadRerankTaskSettings(topN, returnDocuments),
             new DefaultSecretSettings(new SecureString(apiKey.toCharArray())),
             uri
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java
index 389bf52771e98..131e904779c68 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java
@@ -28,38 +28,35 @@
 public class MixedbreadRerankServiceSettingsTests extends AbstractWireSerializingTestCase {
     private static final String MODEL = "model";
     private static final RateLimitSettings RATE_LIMIT = new RateLimitSettings(2);
-    private static final Integer WINDOWS_SIZE = 512;
 
     public static MixedbreadRerankServiceSettings createRandom() {
         return createRandom(randomFrom(new RateLimitSettings[] { null, RateLimitSettingsTests.createRandom() }));
     }
 
     public static MixedbreadRerankServiceSettings createRandom(@Nullable RateLimitSettings rateLimitSettings) {
-        return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings, null);
+        return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings);
     }
 
     public void testToXContent_WritesAllValues() throws IOException {
-        var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, RATE_LIMIT, WINDOWS_SIZE);
+        var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, RATE_LIMIT);
         assertThat(getXContentResult(serviceSettings), equalToIgnoringWhitespaceInJsonString("""
             {
                 "model_id":"model",
                 "rate_limit": {
                     "requests_per_minute": 2
-                },
-                "windows_size": 512
+                }
             }
             """));
     }
 
-    public void testToXContent_DoesNotWriteOptionalValues_DefaultRateLimit_And_DefaultWindowsSize() throws IOException {
-        var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, null, null);
+    public void testToXContent_DoesNotWriteOptionalValues_DefaultRateLimit() throws IOException {
+        var serviceSettings = new MixedbreadRerankServiceSettings(MODEL, null);
         assertThat(getXContentResult(serviceSettings), equalToIgnoringWhitespaceInJsonString("""
             {
                 "model_id":"model",
                 "rate_limit": {
                     "requests_per_minute": 100
-                },
-                "windows_size": 8000
+                }
             }
             """));
     }
@@ -90,7 +87,7 @@ protected MixedbreadRerankServiceSettings mutateInstance(MixedbreadRerankService
             default -> throw new AssertionError("Illegal randomisation branch");
         }
 
-        return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings, null);
+        return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings);
     }
 
     public static Map getServiceSettingsMap(String model) {
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java
index c4461130892eb..4de364cf95f6b 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankTaskSettingsTests.java
@@ -42,13 +42,13 @@ public void testFromMap_WithValidValues_ReturnsSettings() {
         );
         var settings = MixedbreadRerankTaskSettings.fromMap(new HashMap<>(taskMap));
         assertTrue(settings.getReturnDocuments());
-        assertEquals(TOP_N, settings.getTopNDocumentsOnly().intValue());
+        assertEquals(TOP_N, settings.getTopN().intValue());
     }
 
     public void testFromMap_WithNullValues_ReturnsSettingsWithNulls() {
         var settings = MixedbreadRerankTaskSettings.fromMap(Map.of());
         assertNull(settings.getReturnDocuments());
-        assertNull(settings.getTopNDocumentsOnly());
+        assertNull(settings.getTopN());
     }
 
     public void testFromMap_WithInvalidReturnDocuments_ThrowsValidationException() {
@@ -84,14 +84,14 @@ public void testUpdatedTaskSettings_WithNewReturnDocuments_ReturnsUpdatedSetting
         Map newSettings = Map.of(MixedbreadRerankTaskSettings.RETURN_DOCUMENTS, RETURN_DOCUMENTS_FALSE);
         MixedbreadRerankTaskSettings updatedSettings = initialSettings.updatedTaskSettings(newSettings);
         assertFalse(updatedSettings.getReturnDocuments());
-        assertEquals(initialSettings.getTopNDocumentsOnly(), updatedSettings.getTopNDocumentsOnly());
+        assertEquals(initialSettings.getTopN(), updatedSettings.getTopN());
     }
 
     public void testUpdatedTaskSettings_WithNewTopNDocsOnly_ReturnsUpdatedSettings() {
         var initialSettings = new MixedbreadRerankTaskSettings(TOP_N, RETURN_DOCUMENTS_TRUE);
         Map newSettings = Map.of(MixedbreadRerankTaskSettings.TOP_N, TOP_N_UPDATE_VALUE);
         MixedbreadRerankTaskSettings updatedSettings = initialSettings.updatedTaskSettings(newSettings);
-        assertEquals(TOP_N_UPDATE_VALUE, updatedSettings.getTopNDocumentsOnly().intValue());
+        assertEquals(TOP_N_UPDATE_VALUE, updatedSettings.getTopN().intValue());
         assertEquals(initialSettings.getReturnDocuments(), updatedSettings.getReturnDocuments());
     }
 
@@ -105,7 +105,7 @@ public void testUpdatedTaskSettings_WithMultipleNewValues_ReturnsUpdatedSettings
         );
         MixedbreadRerankTaskSettings updatedSettings = initialSettings.updatedTaskSettings(newSettings);
         assertFalse(updatedSettings.getReturnDocuments());
-        assertEquals(TOP_N_UPDATE_VALUE, updatedSettings.getTopNDocumentsOnly().intValue());
+        assertEquals(TOP_N_UPDATE_VALUE, updatedSettings.getTopN().intValue());
     }
 
     @Override
@@ -120,7 +120,7 @@ protected MixedbreadRerankTaskSettings createTestInstance() {
 
     @Override
     protected MixedbreadRerankTaskSettings mutateInstance(MixedbreadRerankTaskSettings instance) throws IOException {
-        var topNDocsOnly = instance.getTopNDocumentsOnly();
+        var topNDocsOnly = instance.getTopN();
         var returnDocuments = instance.getReturnDocuments();
         switch (randomInt(1)) {
             case 0 -> topNDocsOnly = randomValueOtherThan(topNDocsOnly, () -> randomFrom(randomIntBetween(1, 10), null));
@@ -129,11 +129,11 @@ protected MixedbreadRerankTaskSettings mutateInstance(MixedbreadRerankTaskSettin
         return new MixedbreadRerankTaskSettings(topNDocsOnly, returnDocuments);
     }
 
-    public static Map getTaskSettingsMap(@Nullable Integer topNDocumentsOnly, Boolean returnDocuments) {
+    public static Map getTaskSettingsMap(@Nullable Integer topN, Boolean returnDocuments) {
         var map = new HashMap();
 
-        if (topNDocumentsOnly != null) {
-            map.put(MixedbreadRerankTaskSettings.TOP_N, topNDocumentsOnly);
+        if (topN != null) {
+            map.put(MixedbreadRerankTaskSettings.TOP_N, topN);
         }
 
         if (returnDocuments != null) {

From 84aab0e2ec9ea97bf082e1e47ccfaa52ae3d12ae Mon Sep 17 00:00:00 2001
From: elasticsearchmachine 
Date: Tue, 3 Feb 2026 00:32:57 +0000
Subject: [PATCH 24/48] [CI] Update transport version definitions

---
 ...ence_mixedbread_added.csv => inference_mixedbread_added.csv} | 0
 server/src/main/resources/transport/upper_bounds/9.4.csv        | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename server/src/main/resources/transport/definitions/referable/{ml_inference_mixedbread_added.csv => inference_mixedbread_added.csv} (100%)

diff --git a/server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
similarity index 100%
rename from server/src/main/resources/transport/definitions/referable/ml_inference_mixedbread_added.csv
rename to server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv
index 7c45b6f2fdf11..474b4c42addd6 100644
--- a/server/src/main/resources/transport/upper_bounds/9.4.csv
+++ b/server/src/main/resources/transport/upper_bounds/9.4.csv
@@ -1 +1 @@
-ml_inference_mixedbread_added,9271000
+inference_mixedbread_added,9271000

From 64956c1efc4629dc5ca5c95c17633faa9c7d344c Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Tue, 3 Feb 2026 11:38:29 +0100
Subject: [PATCH 25/48] Adjust to a new model creation approach

---
 .../mixedbread/MixedbreadService.java         | 58 ++++++++++++++++---
 .../rerank/MixedbreadRerankModel.java         | 19 +++++-
 .../rerank/MixedbreadRerankModelCreator.java  | 43 ++++++++++++++
 3 files changed, 112 insertions(+), 8 deletions(-)
 create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelCreator.java

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
index 5fa7b15768662..6b4b365c68b82 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
@@ -33,11 +33,12 @@
 import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs;
 import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;
 import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
+import org.elasticsearch.xpack.inference.services.ModelCreator;
 import org.elasticsearch.xpack.inference.services.SenderService;
 import org.elasticsearch.xpack.inference.services.ServiceComponents;
 import org.elasticsearch.xpack.inference.services.ServiceUtils;
 import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionCreator;
-import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel;
+import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModelCreator;
 import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
 import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
 
@@ -83,6 +84,18 @@ public class MixedbreadService extends SenderService implements RerankingInferen
      */
     private static final int DEFAULT_RERANKER_INPUT_SIZE_WORDS = 8000;
 
+    private static final Map> MODEL_CREATORS = Map.of(
+        TaskType.RERANK,
+        new MixedbreadRerankModelCreator()
+    );
+
+    /**
+     * Constructor for creating an {@link MixedbreadService} with specified HTTP request sender factory and service components.
+     *
+     * @param factory the factory to create HTTP request senders
+     * @param serviceComponents the components required for the inference service
+     * @param context the context for the inference service factory
+     */
     public MixedbreadService(
         HttpRequestSender.Factory factory,
         ServiceComponents serviceComponents,
@@ -152,20 +165,40 @@ private MixedbreadModel parsePersistedConfigWithSecrets(
         );
     }
 
-    private static MixedbreadModel createModel(
-        String inferenceEntityId,
+    /**
+     * Creates an {@link MixedbreadModel} based on the provided parameters.
+     *
+     * @param inferenceId the unique identifier for the inference entity
+     * @param taskType the type of task this model is designed for
+     * @param serviceSettings the settings for the inference service
+     * @param taskSettings the task-specific settings, if applicable
+     * @param chunkingSettings the settings for chunking, if applicable
+     * @param secretSettings the secret settings for the model, such as API keys or tokens
+     * @param context the context for parsing configuration settings
+     * @return a new instance of {@link MixedbreadModel} based on the provided parameters
+     */
+    protected MixedbreadModel createModel(
+        String inferenceId,
         TaskType taskType,
         Map serviceSettings,
         Map taskSettings,
         ChunkingSettings chunkingSettings,
-        @Nullable Map secretSettings,
+        Map secretSettings,
         ConfigurationParseContext context
     ) {
         if (taskType != TaskType.RERANK) {
-            throw createInvalidTaskTypeException(inferenceEntityId, NAME, taskType, context);
+            throw createInvalidTaskTypeException(inferenceId, NAME, taskType, context);
         }
-
-        return new MixedbreadRerankModel(inferenceEntityId, serviceSettings, taskSettings, secretSettings, context);
+        return retrieveModelCreatorFromMapOrThrow(MODEL_CREATORS, inferenceId, taskType, NAME, context).createFromMaps(
+            inferenceId,
+            taskType,
+            NAME,
+            serviceSettings,
+            taskSettings,
+            chunkingSettings,
+            secretSettings,
+            context
+        );
     }
 
     @Override
@@ -191,6 +224,17 @@ public MixedbreadModel parsePersistedConfigWithSecrets(
         );
     }
 
+    @Override
+    public Model buildModelFromConfigAndSecrets(ModelConfigurations config, ModelSecrets secrets) {
+        return retrieveModelCreatorFromMapOrThrow(
+            MODEL_CREATORS,
+            config.getInferenceEntityId(),
+            config.getTaskType(),
+            config.getService(),
+            ConfigurationParseContext.PERSISTENT
+        ).createFromModelConfigurationsAndSecrets(config, secrets);
+    }
+
     @Override
     public MixedbreadModel parsePersistedConfig(String inferenceEntityId, TaskType taskType, Map config) {
         Map serviceSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS);
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
index 0d3fa6dd3516a..98ff4c8bee183 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
@@ -19,6 +19,7 @@
 import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadUtils;
 import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor;
 import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
+import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
 
 import java.util.Map;
 import java.util.Objects;
@@ -73,7 +74,23 @@ public MixedbreadRerankModel(
         );
     }
 
-    private MixedbreadRerankModel(MixedbreadRerankModel model, MixedbreadRerankTaskSettings taskSettings) {
+    /**
+     * Constructor for creating an {@link MixedbreadRerankModel} from model configurations and secrets.
+     *
+     * @param modelConfigurations the configurations for the model
+     * @param modelSecrets the secret settings for the model
+     */
+    public MixedbreadRerankModel(ModelConfigurations modelConfigurations, ModelSecrets modelSecrets) {
+        super(
+            modelConfigurations,
+            modelSecrets,
+            (DefaultSecretSettings) modelSecrets.getSecretSettings(),
+            (RateLimitSettings) modelConfigurations.getServiceSettings(),
+            null
+        );
+    }
+
+    public MixedbreadRerankModel(MixedbreadRerankModel model, MixedbreadRerankTaskSettings taskSettings) {
         super(model, taskSettings);
     }
 
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelCreator.java
new file mode 100644
index 0000000000000..3f931e21a307c
--- /dev/null
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModelCreator.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.mixedbread.rerank;
+
+import org.elasticsearch.core.Nullable;
+import org.elasticsearch.inference.ChunkingSettings;
+import org.elasticsearch.inference.ModelConfigurations;
+import org.elasticsearch.inference.ModelSecrets;
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
+import org.elasticsearch.xpack.inference.services.ModelCreator;
+
+import java.util.Map;
+
+/**
+ * Creates {@link MixedbreadRerankModel} instances from config maps
+ * or {@link ModelConfigurations} and {@link ModelSecrets} objects.
+ */
+public class MixedbreadRerankModelCreator implements ModelCreator {
+    @Override
+    public MixedbreadRerankModel createFromMaps(
+        String inferenceId,
+        TaskType taskType,
+        String service,
+        Map serviceSettings,
+        @Nullable Map taskSettings,
+        @Nullable ChunkingSettings chunkingSettings,
+        @Nullable Map secretSettings,
+        ConfigurationParseContext context
+    ) {
+        return new MixedbreadRerankModel(inferenceId, serviceSettings, taskSettings, secretSettings, context);
+    }
+
+    @Override
+    public MixedbreadRerankModel createFromModelConfigurationsAndSecrets(ModelConfigurations config, ModelSecrets secrets) {
+        return new MixedbreadRerankModel(config, secrets);
+    }
+}

From abb457d794805e96e946444947caccc35a48a33f Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Tue, 3 Feb 2026 16:59:05 +0100
Subject: [PATCH 26/48] Address comments

---
 .../services/mixedbread/MixedbreadModel.java  | 25 ++++++-----------
 .../mixedbread/MixedbreadService.java         |  6 +---
 .../rerank/MixedbreadRerankModel.java         | 16 +++--------
 .../MixedbreadRerankServiceSettings.java      | 28 +++++++++----------
 .../mixedbread/MixedbreadServiceTests.java    |  2 +-
 5 files changed, 28 insertions(+), 49 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java
index 5fb51c966552a..e04cfababebdd 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java
@@ -16,12 +16,12 @@
 import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel;
 import org.elasticsearch.xpack.inference.services.ServiceUtils;
 import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor;
+import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettings;
 import org.elasticsearch.xpack.inference.services.settings.ApiKeySecrets;
 import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
 
 import java.net.URI;
 import java.util.Map;
-import java.util.Objects;
 
 /**
  * Abstract class representing a Mixedbread model for inference.
@@ -29,19 +29,11 @@
  */
 public abstract class MixedbreadModel extends RateLimitGroupingModel {
     private final SecureString apiKey;
-    private final RateLimitSettings rateLimitServiceSettings;
     private final URI uri;
 
-    public MixedbreadModel(
-        ModelConfigurations configurations,
-        ModelSecrets secrets,
-        @Nullable ApiKeySecrets apiKeySecrets,
-        RateLimitSettings rateLimitServiceSettings,
-        URI uri
-    ) {
+    public MixedbreadModel(ModelConfigurations configurations, ModelSecrets secrets, @Nullable ApiKeySecrets apiKeySecrets, URI uri) {
         super(configurations, secrets);
 
-        this.rateLimitServiceSettings = Objects.requireNonNull(rateLimitServiceSettings);
         apiKey = ServiceUtils.apiKey(apiKeySecrets);
         this.uri = uri;
     }
@@ -49,7 +41,6 @@ public MixedbreadModel(
     protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) {
         super(model, taskSettings);
 
-        rateLimitServiceSettings = model.rateLimitServiceSettings();
         apiKey = model.apiKey();
         uri = model.uri();
     }
@@ -58,18 +49,20 @@ public SecureString apiKey() {
         return apiKey;
     }
 
-    public RateLimitSettings rateLimitServiceSettings() {
-        return rateLimitServiceSettings;
-    }
-
     public abstract ExecutableAction accept(MixedbreadActionVisitor creator, Map taskSettings);
 
     public URI uri() {
         return uri;
     }
 
+    @Override
     public RateLimitSettings rateLimitSettings() {
-        return rateLimitServiceSettings;
+        return getServiceSettings().rateLimitSettings();
+    }
+
+    @Override
+    public MixedbreadRerankServiceSettings getServiceSettings() {
+        return (MixedbreadRerankServiceSettings) super.getServiceSettings();
     }
 
     public int rateLimitGroupingHash() {
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
index 6b4b365c68b82..8ef0050dff63e 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
@@ -49,7 +49,6 @@
 
 import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException;
-import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidTaskTypeException;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwUnsupportedUnifiedCompletionOperation;
 
 /**
@@ -183,12 +182,9 @@ protected MixedbreadModel createModel(
         Map serviceSettings,
         Map taskSettings,
         ChunkingSettings chunkingSettings,
-        Map secretSettings,
+        @Nullable Map secretSettings,
         ConfigurationParseContext context
     ) {
-        if (taskType != TaskType.RERANK) {
-            throw createInvalidTaskTypeException(inferenceId, NAME, taskType, context);
-        }
         return retrieveModelCreatorFromMapOrThrow(MODEL_CREATORS, inferenceId, taskType, NAME, context).createFromMaps(
             inferenceId,
             taskType,
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
index 98ff4c8bee183..96c754a5ae703 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
@@ -19,7 +19,6 @@
 import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadUtils;
 import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor;
 import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
-import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
 
 import java.util.Map;
 import java.util.Objects;
@@ -53,17 +52,16 @@ public MixedbreadRerankModel(
 
     // should only be used for testing
     MixedbreadRerankModel(
-        String modelId,
+        String inferenceId,
         MixedbreadRerankServiceSettings serviceSettings,
         MixedbreadRerankTaskSettings taskSettings,
         @Nullable DefaultSecretSettings secretSettings,
         @Nullable String uri
     ) {
         super(
-            new ModelConfigurations(modelId, TaskType.RERANK, MixedbreadService.NAME, serviceSettings, taskSettings),
+            new ModelConfigurations(inferenceId, TaskType.RERANK, MixedbreadService.NAME, serviceSettings, taskSettings),
             new ModelSecrets(secretSettings),
             secretSettings,
-            serviceSettings.rateLimitSettings(),
             Objects.requireNonNullElse(
                 ServiceUtils.createOptionalUri(uri),
                 buildUri(
@@ -81,13 +79,7 @@ public MixedbreadRerankModel(
      * @param modelSecrets the secret settings for the model
      */
     public MixedbreadRerankModel(ModelConfigurations modelConfigurations, ModelSecrets modelSecrets) {
-        super(
-            modelConfigurations,
-            modelSecrets,
-            (DefaultSecretSettings) modelSecrets.getSecretSettings(),
-            (RateLimitSettings) modelConfigurations.getServiceSettings(),
-            null
-        );
+        super(modelConfigurations, modelSecrets, (DefaultSecretSettings) modelSecrets.getSecretSettings(), null);
     }
 
     public MixedbreadRerankModel(MixedbreadRerankModel model, MixedbreadRerankTaskSettings taskSettings) {
@@ -96,7 +88,7 @@ public MixedbreadRerankModel(MixedbreadRerankModel model, MixedbreadRerankTaskSe
 
     @Override
     public MixedbreadRerankServiceSettings getServiceSettings() {
-        return (MixedbreadRerankServiceSettings) super.getServiceSettings();
+        return super.getServiceSettings();
     }
 
     @Override
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java
index 354d528a25210..9a2e1bcf4657b 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java
@@ -41,7 +41,7 @@ public class MixedbreadRerankServiceSettings extends FilteredXContentObject impl
     public static MixedbreadRerankServiceSettings fromMap(Map map, ConfigurationParseContext context) {
         ValidationException validationException = new ValidationException();
 
-        String model = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
+        String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
         RateLimitSettings rateLimitSettings = RateLimitSettings.of(
             map,
             DEFAULT_RATE_LIMIT_SETTINGS,
@@ -50,30 +50,28 @@ public static MixedbreadRerankServiceSettings fromMap(Map map, C
             context
         );
 
-        if (validationException.validationErrors().isEmpty() == false) {
-            throw validationException;
-        }
+        validationException.throwIfValidationErrorsExist();
 
-        return new MixedbreadRerankServiceSettings(model, rateLimitSettings);
+        return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings);
     }
 
-    private final String model;
+    private final String modelId;
 
     private final RateLimitSettings rateLimitSettings;
 
-    public MixedbreadRerankServiceSettings(String model, @Nullable RateLimitSettings rateLimitSettings) {
-        this.model = model;
+    public MixedbreadRerankServiceSettings(String modelId, @Nullable RateLimitSettings rateLimitSettings) {
+        this.modelId = modelId;
         this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
     }
 
     public MixedbreadRerankServiceSettings(StreamInput in) throws IOException {
-        this.model = in.readOptionalString();
+        this.modelId = in.readOptionalString();
         this.rateLimitSettings = new RateLimitSettings(in);
     }
 
     @Override
     public String modelId() {
-        return model;
+        return modelId;
     }
 
     public RateLimitSettings rateLimitSettings() {
@@ -98,8 +96,8 @@ public boolean supportsVersion(TransportVersion version) {
 
     @Override
     protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException {
-        if (model != null) {
-            builder.field(MODEL_ID, model);
+        if (modelId != null) {
+            builder.field(MODEL_ID, modelId);
         }
 
         rateLimitSettings.toXContent(builder, params);
@@ -120,7 +118,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
-        out.writeOptionalString(model);
+        out.writeOptionalString(modelId);
         rateLimitSettings.writeTo(out);
     }
 
@@ -129,11 +127,11 @@ public boolean equals(Object object) {
         if (this == object) return true;
         if (object == null || getClass() != object.getClass()) return false;
         MixedbreadRerankServiceSettings that = (MixedbreadRerankServiceSettings) object;
-        return Objects.equals(model, that.modelId()) && Objects.equals(rateLimitSettings, that.rateLimitSettings());
+        return Objects.equals(modelId, that.modelId()) && Objects.equals(rateLimitSettings, that.rateLimitSettings());
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(model, rateLimitSettings);
+        return Objects.hash(modelId, rateLimitSettings);
     }
 }
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
index 7433989185c76..6806951021b19 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
@@ -819,7 +819,7 @@ private static  void assertCommonModelSettings(
     ) {
         assertThat(model.uri().toString(), is(url));
         assertThat(model.getServiceSettings().modelId(), is(modelName));
-        assertThat(model.rateLimitServiceSettings(), is(rateLimitSettings));
+        assertThat(model.rateLimitSettings(), is(rateLimitSettings));
 
         assertThat(model.apiKey().toString(), is(apiKey));
     }

From c324b3f820d5c2d6650b8fa2651a940d424112e6 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Wed, 4 Feb 2026 09:51:29 +0100
Subject: [PATCH 27/48] Use API key from SecretSettings

---
 .../services/mixedbread/MixedbreadModel.java  | 20 ++++++++-----------
 .../rerank/MixedbreadRerankRequest.java       |  2 +-
 .../rerank/MixedbreadRerankModel.java         |  2 +-
 .../mixedbread/MixedbreadServiceTests.java    | 12 ++++++++---
 4 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java
index e04cfababebdd..4275e84610cc7 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadModel.java
@@ -7,48 +7,39 @@
 
 package org.elasticsearch.xpack.inference.services.mixedbread;
 
-import org.elasticsearch.common.settings.SecureString;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.inference.ModelConfigurations;
 import org.elasticsearch.inference.ModelSecrets;
 import org.elasticsearch.inference.TaskSettings;
 import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
 import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel;
-import org.elasticsearch.xpack.inference.services.ServiceUtils;
 import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor;
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettings;
 import org.elasticsearch.xpack.inference.services.settings.ApiKeySecrets;
+import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
 import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
 
 import java.net.URI;
 import java.util.Map;
+import java.util.Objects;
 
 /**
  * Abstract class representing a Mixedbread model for inference.
  * This class extends RateLimitGroupingModel and provides common functionality for Mixedbread models.
  */
 public abstract class MixedbreadModel extends RateLimitGroupingModel {
-    private final SecureString apiKey;
     private final URI uri;
 
     public MixedbreadModel(ModelConfigurations configurations, ModelSecrets secrets, @Nullable ApiKeySecrets apiKeySecrets, URI uri) {
         super(configurations, secrets);
-
-        apiKey = ServiceUtils.apiKey(apiKeySecrets);
         this.uri = uri;
     }
 
     protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) {
         super(model, taskSettings);
-
-        apiKey = model.apiKey();
         uri = model.uri();
     }
 
-    public SecureString apiKey() {
-        return apiKey;
-    }
-
     public abstract ExecutableAction accept(MixedbreadActionVisitor creator, Map taskSettings);
 
     public URI uri() {
@@ -65,7 +56,12 @@ public MixedbreadRerankServiceSettings getServiceSettings() {
         return (MixedbreadRerankServiceSettings) super.getServiceSettings();
     }
 
+    @Override
+    public DefaultSecretSettings getSecretSettings() {
+        return (DefaultSecretSettings) super.getSecretSettings();
+    }
+
     public int rateLimitGroupingHash() {
-        return apiKey().hashCode();
+        return Objects.hash(getServiceSettings().modelId(), uri, getSecretSettings());
     }
 }
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java
index 8d52457afd0d4..5bc17ee35d859 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/request/rerank/MixedbreadRerankRequest.java
@@ -63,7 +63,7 @@ public HttpRequest createHttpRequest() {
         httpPost.setEntity(byteEntity);
 
         httpPost.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType());
-        httpPost.setHeader(createAuthBearerHeader(model.apiKey()));
+        httpPost.setHeader(createAuthBearerHeader(model.getSecretSettings().apiKey()));
 
         return new HttpRequest(httpPost, getInferenceEntityId());
     }
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
index 96c754a5ae703..a29b522bf9262 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankModel.java
@@ -98,7 +98,7 @@ public MixedbreadRerankTaskSettings getTaskSettings() {
 
     @Override
     public DefaultSecretSettings getSecretSettings() {
-        return (DefaultSecretSettings) super.getSecretSettings();
+        return super.getSecretSettings();
     }
 
     /**
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
index 6806951021b19..b0cd2b6c5b1f3 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
@@ -124,8 +124,11 @@ public void testParseRequestConfig_createsRerankModel() throws IOException {
                 modelListener
             );
 
+            var rerankModel = (MixedbreadRerankModel) modelListener.actionGet();
+
+            assertThat(rerankModel.getSecretSettings().apiKey().toString(), is(apiKey));
             assertRerankModelSettings(
-                modelListener.actionGet(),
+                rerankModel,
                 modelName,
                 new RateLimitSettings(requestsPerMinute),
                 apiKey,
@@ -148,6 +151,9 @@ public void testParseRequestConfig_onlyRequiredSettings_createsRerankModel() thr
                 modelListener
             );
 
+            var rerankModel = (MixedbreadRerankModel) modelListener.actionGet();
+
+            assertThat(rerankModel.getSecretSettings().apiKey().toString(), is(apiKey));
             assertRerankModelSettings(
                 modelListener.actionGet(),
                 modelName,
@@ -180,6 +186,7 @@ public void testParsePersistedConfigWithSecrets_createsRerankModel() throws IOEx
                 persistedConfig.secrets()
             );
 
+            assertThat(model.getSecretSettings().apiKey().toString(), is(apiKey));
             assertRerankModelSettings(
                 model,
                 modelName,
@@ -204,6 +211,7 @@ public void testParsePersistedConfigWithSecrets_onlyRequiredSettings_createsRera
                 persistedConfig.secrets()
             );
 
+            assertThat(model.getSecretSettings().apiKey().toString(), is(apiKey));
             assertRerankModelSettings(
                 model,
                 modelName,
@@ -820,8 +828,6 @@ private static  void assertCommonModelSettings(
         assertThat(model.uri().toString(), is(url));
         assertThat(model.getServiceSettings().modelId(), is(modelName));
         assertThat(model.rateLimitSettings(), is(rateLimitSettings));
-
-        assertThat(model.apiKey().toString(), is(apiKey));
     }
 
     private MixedbreadService createMixedbreadService() {

From 9e0e298f3bebc81e6b6091360589c2fa785aad13 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine 
Date: Wed, 4 Feb 2026 14:49:02 +0000
Subject: [PATCH 28/48] [CI] Update transport version definitions

---
 .../definitions/referable/inference_mixedbread_added.csv        | 2 +-
 server/src/main/resources/transport/upper_bounds/9.4.csv        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
index f43c057011643..8f5d4af46d0c6 100644
--- a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
+++ b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
@@ -1 +1 @@
-9271000
+9273000
diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv
index e4f5404a43155..f342938b1709c 100644
--- a/server/src/main/resources/transport/upper_bounds/9.4.csv
+++ b/server/src/main/resources/transport/upper_bounds/9.4.csv
@@ -1 +1 @@
-index_limit_exceeded_exception,9272000
+inference_mixedbread_added,9273000

From e807531d17550b6d863e749d6fcd893643d80830 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Fri, 6 Feb 2026 16:57:12 +0100
Subject: [PATCH 29/48] Change area from ML to Inference

---
 docs/changelog/140477.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/changelog/140477.yaml b/docs/changelog/140477.yaml
index 66de374507108..c9112ce326c32 100644
--- a/docs/changelog/140477.yaml
+++ b/docs/changelog/140477.yaml
@@ -1,5 +1,5 @@
 pr: 140477
 summary: "[Inference API] Add Mixedbread Rerank support to the Inference Plugin"
-area: Machine Learning
+area: Inference
 type: enhancement
 issues: []

From 29240ab8240bb2c91895a95fbaefe06ff9106ad7 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine 
Date: Fri, 6 Feb 2026 16:17:45 +0000
Subject: [PATCH 30/48] [CI] Update transport version definitions

---
 .../definitions/referable/inference_mixedbread_added.csv        | 2 +-
 server/src/main/resources/transport/upper_bounds/9.4.csv        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
index 8f5d4af46d0c6..36c3efe7e7b78 100644
--- a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
+++ b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
@@ -1 +1 @@
-9273000
+9277000
diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv
index 3f1b8b5e9111b..5f6246bccce50 100644
--- a/server/src/main/resources/transport/upper_bounds/9.4.csv
+++ b/server/src/main/resources/transport/upper_bounds/9.4.csv
@@ -1 +1 @@
-multi_get_split_shard_count_summary,9276000
+inference_mixedbread_added,9277000

From e840607a73d7b9585546e70241d23cc7d336eb78 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Fri, 6 Feb 2026 21:35:43 +0100
Subject: [PATCH 31/48] Address comments

---
 .../services/mixedbread/MixedbreadService.java       | 12 +++---------
 .../rerank/MixedbreadRerankServiceSettings.java      | 11 ++++-------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
index 8ef0050dff63e..51dba4298ba38 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
@@ -123,14 +123,12 @@ public void parseRequestConfig(
             Map serviceSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS);
             Map taskSettingsMap = ServiceUtils.removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS);
 
-            ChunkingSettings chunkingSettings = null;
-
             MixedbreadModel model = createModel(
                 inferenceEntityId,
                 taskType,
                 serviceSettingsMap,
                 taskSettingsMap,
-                chunkingSettings,
+                null,
                 serviceSettingsMap,
                 ConfigurationParseContext.REQUEST
             );
@@ -208,14 +206,12 @@ public MixedbreadModel parsePersistedConfigWithSecrets(
         Map taskSettingsMap = ServiceUtils.removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS);
         Map secretSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(secrets, ModelSecrets.SECRET_SETTINGS);
 
-        ChunkingSettings chunkingSettings = null;
-
         return parsePersistedConfigWithSecrets(
             inferenceEntityId,
             taskType,
             serviceSettingsMap,
             taskSettingsMap,
-            chunkingSettings,
+            null,
             secretSettingsMap
         );
     }
@@ -236,9 +232,7 @@ public MixedbreadModel parsePersistedConfig(String inferenceEntityId, TaskType t
         Map serviceSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS);
         Map taskSettingsMap = ServiceUtils.removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS);
 
-        ChunkingSettings chunkingSettings = null;
-
-        return parsePersistedConfigWithSecrets(inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, chunkingSettings, null);
+        return parsePersistedConfigWithSecrets(inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, null, null);
     }
 
     @Override
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java
index 9a2e1bcf4657b..7fc9547d334ac 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettings.java
@@ -60,12 +60,12 @@ public static MixedbreadRerankServiceSettings fromMap(Map map, C
     private final RateLimitSettings rateLimitSettings;
 
     public MixedbreadRerankServiceSettings(String modelId, @Nullable RateLimitSettings rateLimitSettings) {
-        this.modelId = modelId;
+        this.modelId = Objects.requireNonNull(modelId);
         this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
     }
 
     public MixedbreadRerankServiceSettings(StreamInput in) throws IOException {
-        this.modelId = in.readOptionalString();
+        this.modelId = in.readString();
         this.rateLimitSettings = new RateLimitSettings(in);
     }
 
@@ -96,10 +96,7 @@ public boolean supportsVersion(TransportVersion version) {
 
     @Override
     protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException {
-        if (modelId != null) {
-            builder.field(MODEL_ID, modelId);
-        }
-
+        builder.field(MODEL_ID, modelId);
         rateLimitSettings.toXContent(builder, params);
 
         return builder;
@@ -118,7 +115,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
-        out.writeOptionalString(modelId);
+        out.writeString(modelId);
         rateLimitSettings.writeTo(out);
     }
 

From 358d3afbe4d04f1558e59e9d76d64409c732adbe Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Fri, 6 Feb 2026 22:18:32 +0100
Subject: [PATCH 32/48] Address comments

---
 .../inference/services/mixedbread/MixedbreadService.java | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
index 51dba4298ba38..32ea93428ce27 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
@@ -206,14 +206,7 @@ public MixedbreadModel parsePersistedConfigWithSecrets(
         Map taskSettingsMap = ServiceUtils.removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS);
         Map secretSettingsMap = ServiceUtils.removeFromMapOrThrowIfNull(secrets, ModelSecrets.SECRET_SETTINGS);
 
-        return parsePersistedConfigWithSecrets(
-            inferenceEntityId,
-            taskType,
-            serviceSettingsMap,
-            taskSettingsMap,
-            null,
-            secretSettingsMap
-        );
+        return parsePersistedConfigWithSecrets(inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, null, secretSettingsMap);
     }
 
     @Override

From 490cf6ed73d2cbe7a756ac9e6db782a26e64f81b Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Sat, 7 Feb 2026 21:07:31 +0100
Subject: [PATCH 33/48] Add parameterized tests

---
 .../mixedbread/MixedbreadService.java         |   6 +-
 ...erviceParameterizedModelCreationTests.java |  16 ++
 ...breadServiceParameterizedParsingTests.java |  16 ++
 .../mixedbread/MixedbreadServiceTests.java    | 151 +++++++++++++++++-
 4 files changed, 184 insertions(+), 5 deletions(-)
 create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceParameterizedModelCreationTests.java
 create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceParameterizedParsingTests.java

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
index 32ea93428ce27..f34988dfa95eb 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
@@ -79,9 +79,11 @@ public class MixedbreadService extends SenderService implements RerankingInferen
     );
 
     /**
-     * Apart from v1 all other models have a context length of at least 8k.
+     * Apart from v1 all other models have a context length of up to 32k.
+     * Here
+     * 8k tokens were converted into 5500 words, that's why the default window size is set to 22000
      */
-    private static final int DEFAULT_RERANKER_INPUT_SIZE_WORDS = 8000;
+    private static final int DEFAULT_RERANKER_INPUT_SIZE_WORDS = 22000;
 
     private static final Map> MODEL_CREATORS = Map.of(
         TaskType.RERANK,
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceParameterizedModelCreationTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceParameterizedModelCreationTests.java
new file mode 100644
index 0000000000000..7af72c6db4589
--- /dev/null
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceParameterizedModelCreationTests.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.mixedbread;
+
+import org.elasticsearch.xpack.inference.services.AbstractInferenceServiceParameterizedModelCreationTests;
+
+public class MixedbreadServiceParameterizedModelCreationTests extends AbstractInferenceServiceParameterizedModelCreationTests {
+    public MixedbreadServiceParameterizedModelCreationTests(TestCase testCase) {
+        super(MixedbreadServiceTests.createTestConfiguration(), testCase);
+    }
+}
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceParameterizedParsingTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceParameterizedParsingTests.java
new file mode 100644
index 0000000000000..1b90a6a0dc0c5
--- /dev/null
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceParameterizedParsingTests.java
@@ -0,0 +1,16 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.mixedbread;
+
+import org.elasticsearch.xpack.inference.services.AbstractInferenceServiceParameterizedParsingTests;
+
+public class MixedbreadServiceParameterizedParsingTests extends AbstractInferenceServiceParameterizedParsingTests {
+    public MixedbreadServiceParameterizedParsingTests(TestCase testCase) {
+        super(MixedbreadServiceTests.createTestConfiguration(), testCase);
+    }
+}
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
index b0cd2b6c5b1f3..f5b3384d0dd20 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
@@ -15,12 +15,16 @@
 import org.elasticsearch.common.ValidationException;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.settings.SecureString;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.inference.EmptyTaskSettings;
 import org.elasticsearch.inference.InferenceService;
 import org.elasticsearch.inference.InferenceServiceConfiguration;
 import org.elasticsearch.inference.InferenceServiceResults;
 import org.elasticsearch.inference.Model;
+import org.elasticsearch.inference.ModelConfigurations;
+import org.elasticsearch.inference.ModelSecrets;
 import org.elasticsearch.inference.RerankingInferenceService;
 import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.test.http.MockResponse;
@@ -33,11 +37,16 @@
 import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender;
 import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSenderTests;
 import org.elasticsearch.xpack.inference.logging.ThrottlerManager;
-import org.elasticsearch.xpack.inference.services.InferenceServiceTestCase;
+import org.elasticsearch.xpack.inference.services.AbstractInferenceServiceTests;
+import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
+import org.elasticsearch.xpack.inference.services.SenderService;
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModel;
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankModelTests;
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettings;
+import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettingsTests;
 import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings;
+import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettingsTests;
+import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
 import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Matchers;
@@ -45,11 +54,13 @@
 import org.junit.Before;
 
 import java.io.IOException;
+import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 import static org.elasticsearch.common.xcontent.XContentHelper.toXContent;
+import static org.elasticsearch.inference.TaskType.COMPLETION;
 import static org.elasticsearch.inference.TaskType.RERANK;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertToXContentEquivalent;
 import static org.elasticsearch.xpack.inference.Utils.getPersistedConfigMap;
@@ -69,7 +80,7 @@
 import static org.hamcrest.Matchers.instanceOf;
 import static org.mockito.Mockito.mock;
 
-public class MixedbreadServiceTests extends InferenceServiceTestCase {
+public class MixedbreadServiceTests extends AbstractInferenceServiceTests {
     public static final String UNKNOWN_SETTINGS_EXCEPTION =
         "Configuration contains settings [{extra_key=value}] unknown to the [mixedbread] service";
     public static final Boolean RETURN_DOCUMENTS_TRUE = true;
@@ -89,6 +100,140 @@ public class MixedbreadServiceTests extends InferenceServiceTestCase {
     private ThreadPool threadPool;
     private HttpClientManager clientManager;
 
+    public MixedbreadServiceTests() {
+        super(createTestConfiguration());
+    }
+
+    public static TestConfiguration createTestConfiguration() {
+        return new TestConfiguration.Builder(
+            new CommonConfig(RERANK, COMPLETION, EnumSet.of(RERANK)) {
+
+                @Override
+                protected SenderService createService(ThreadPool threadPool, HttpClientManager clientManager) {
+                    return MixedbreadServiceTests.createService(threadPool, clientManager);
+                }
+
+                @Override
+                protected Map createServiceSettingsMap(TaskType taskType) {
+                    return MixedbreadServiceTests.createServiceSettingsMap(taskType);
+                }
+
+                @Override
+                protected ModelConfigurations createModelConfigurations(TaskType taskType) {
+                    return switch (taskType) {
+                        case RERANK -> new ModelConfigurations(
+                            INFERENCE_ID_VALUE,
+                            taskType,
+                            MixedbreadService.NAME,
+                            MixedbreadRerankServiceSettings.fromMap(
+                                createServiceSettingsMap(taskType, ConfigurationParseContext.PERSISTENT),
+                                ConfigurationParseContext.PERSISTENT
+                            ),
+                            EmptyTaskSettings.INSTANCE
+                        );
+                        default -> throw new IllegalStateException("Unexpected value: " + taskType);
+                    };
+                }
+
+                @Override
+                protected ModelSecrets createModelSecrets() {
+                    return new ModelSecrets(DefaultSecretSettings.fromMap(createSecretSettingsMap()));
+                }
+
+                @Override
+                protected Map createServiceSettingsMap(TaskType taskType, ConfigurationParseContext parseContext) {
+                    return MixedbreadServiceTests.createServiceSettingsMap(taskType);
+                }
+
+                @Override
+                protected Map createTaskSettingsMap(TaskType taskType) {
+                    if (taskType.equals(RERANK)) {
+                        return MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(null, null);
+                    } else {
+                        return createTaskSettingsMap();
+                    }
+                }
+
+                @Override
+                protected Map createTaskSettingsMap() {
+                    return new HashMap<>();
+                }
+
+                @Override
+                protected Map createSecretSettingsMap() {
+                    return MixedbreadServiceTests.createSecretSettingsMap();
+                }
+
+                @Override
+                protected void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets) {
+                    assertModel(model, taskType, modelIncludesSecrets, ConfigurationParseContext.REQUEST);
+                }
+
+                @Override
+                protected void assertModel(
+                    Model model,
+                    TaskType taskType,
+                    boolean modelIncludesSecrets,
+                    ConfigurationParseContext parseContext
+                ) {
+                    MixedbreadServiceTests.assertModel(model, taskType, modelIncludesSecrets, parseContext);
+                }
+
+                @Override
+                protected EnumSet supportedStreamingTasks() {
+                    return null;
+                }
+
+                @Override
+                protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) {
+                    assertThat(rerankingInferenceService.rerankerWindowSize(MODEL_NAME_VALUE), Matchers.is(300));
+                }
+            }
+        )
+            .build();
+    }
+
+    private static void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets, ConfigurationParseContext parseContext) {
+        switch (taskType) {
+            case RERANK -> assertRerankModel(model, modelIncludesSecrets);
+            default -> fail("unexpected task type [" + taskType + "]");
+        }
+    }
+
+    private static MixedbreadModel assertCommonModelFields(Model model, boolean modelIncludesSecrets) {
+        assertThat(model, instanceOf(MixedbreadModel.class));
+
+        var mixedbreadModel = (MixedbreadModel) model;
+        assertThat(mixedbreadModel.getServiceSettings().modelId(), Matchers.is(MODEL_NAME_VALUE));
+        assertThat(mixedbreadModel.uri().toString(), Matchers.is(DEFAULT_RERANK_URL));
+        if (modelIncludesSecrets) {
+            assertThat(mixedbreadModel.getSecretSettings().apiKey(), Matchers.is(new SecureString(API_KEY.toCharArray())));
+        }
+        return mixedbreadModel;
+    }
+
+    private static void assertRerankModel(Model model, boolean modelIncludesSecrets) {
+        var mixedbreadModel = assertCommonModelFields(model, modelIncludesSecrets);
+        assertThat(mixedbreadModel.getTaskSettings(), Matchers.is(EmptyTaskSettings.INSTANCE));
+        assertThat(mixedbreadModel.getTaskType(), Matchers.is(RERANK));
+    }
+
+    public static SenderService createService(ThreadPool threadPool, HttpClientManager clientManager) {
+        var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager);
+        return new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty());
+    }
+
+    private static Map createServiceSettingsMap(TaskType taskType) {
+        if (taskType == RERANK) {
+            return MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(MODEL_NAME_VALUE, null);
+        }
+        return Map.of();
+    }
+
+    private static Map createSecretSettingsMap() {
+        return new HashMap<>(Map.of("api_key", API_KEY));
+    }
+
     @Before
     public void init() throws Exception {
         webServer.start();
@@ -841,6 +986,6 @@ public InferenceService createInferenceService() {
 
     @Override
     protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) {
-        assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(8000));
+        assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(22000));
     }
 }

From d2e98122b5c1002d6c9c1dbc3e38558f4023601b Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Sat, 7 Feb 2026 22:36:59 +0100
Subject: [PATCH 34/48] Apply spotless

---
 .../mixedbread/MixedbreadServiceTests.java    | 141 +++++++++---------
 1 file changed, 69 insertions(+), 72 deletions(-)

diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
index f5b3384d0dd20..9eb4b06020f3a 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
@@ -105,92 +105,89 @@ public MixedbreadServiceTests() {
     }
 
     public static TestConfiguration createTestConfiguration() {
-        return new TestConfiguration.Builder(
-            new CommonConfig(RERANK, COMPLETION, EnumSet.of(RERANK)) {
+        return new TestConfiguration.Builder(new CommonConfig(RERANK, COMPLETION, EnumSet.of(RERANK)) {
 
-                @Override
-                protected SenderService createService(ThreadPool threadPool, HttpClientManager clientManager) {
-                    return MixedbreadServiceTests.createService(threadPool, clientManager);
-                }
+            @Override
+            protected SenderService createService(ThreadPool threadPool, HttpClientManager clientManager) {
+                return MixedbreadServiceTests.createService(threadPool, clientManager);
+            }
 
-                @Override
-                protected Map createServiceSettingsMap(TaskType taskType) {
-                    return MixedbreadServiceTests.createServiceSettingsMap(taskType);
-                }
+            @Override
+            protected Map createServiceSettingsMap(TaskType taskType) {
+                return MixedbreadServiceTests.createServiceSettingsMap(taskType);
+            }
 
-                @Override
-                protected ModelConfigurations createModelConfigurations(TaskType taskType) {
-                    return switch (taskType) {
-                        case RERANK -> new ModelConfigurations(
-                            INFERENCE_ID_VALUE,
-                            taskType,
-                            MixedbreadService.NAME,
-                            MixedbreadRerankServiceSettings.fromMap(
-                                createServiceSettingsMap(taskType, ConfigurationParseContext.PERSISTENT),
-                                ConfigurationParseContext.PERSISTENT
-                            ),
-                            EmptyTaskSettings.INSTANCE
-                        );
-                        default -> throw new IllegalStateException("Unexpected value: " + taskType);
-                    };
-                }
+            @Override
+            protected ModelConfigurations createModelConfigurations(TaskType taskType) {
+                return switch (taskType) {
+                    case RERANK -> new ModelConfigurations(
+                        INFERENCE_ID_VALUE,
+                        taskType,
+                        MixedbreadService.NAME,
+                        MixedbreadRerankServiceSettings.fromMap(
+                            createServiceSettingsMap(taskType, ConfigurationParseContext.PERSISTENT),
+                            ConfigurationParseContext.PERSISTENT
+                        ),
+                        EmptyTaskSettings.INSTANCE
+                    );
+                    default -> throw new IllegalStateException("Unexpected value: " + taskType);
+                };
+            }
 
-                @Override
-                protected ModelSecrets createModelSecrets() {
-                    return new ModelSecrets(DefaultSecretSettings.fromMap(createSecretSettingsMap()));
-                }
+            @Override
+            protected ModelSecrets createModelSecrets() {
+                return new ModelSecrets(DefaultSecretSettings.fromMap(createSecretSettingsMap()));
+            }
 
-                @Override
-                protected Map createServiceSettingsMap(TaskType taskType, ConfigurationParseContext parseContext) {
-                    return MixedbreadServiceTests.createServiceSettingsMap(taskType);
-                }
+            @Override
+            protected Map createServiceSettingsMap(TaskType taskType, ConfigurationParseContext parseContext) {
+                return MixedbreadServiceTests.createServiceSettingsMap(taskType);
+            }
 
-                @Override
-                protected Map createTaskSettingsMap(TaskType taskType) {
-                    if (taskType.equals(RERANK)) {
-                        return MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(null, null);
-                    } else {
-                        return createTaskSettingsMap();
-                    }
+            @Override
+            protected Map createTaskSettingsMap(TaskType taskType) {
+                if (taskType.equals(RERANK)) {
+                    return MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(null, null);
+                } else {
+                    return createTaskSettingsMap();
                 }
+            }
 
-                @Override
-                protected Map createTaskSettingsMap() {
-                    return new HashMap<>();
-                }
+            @Override
+            protected Map createTaskSettingsMap() {
+                return new HashMap<>();
+            }
 
-                @Override
-                protected Map createSecretSettingsMap() {
-                    return MixedbreadServiceTests.createSecretSettingsMap();
-                }
+            @Override
+            protected Map createSecretSettingsMap() {
+                return MixedbreadServiceTests.createSecretSettingsMap();
+            }
 
-                @Override
-                protected void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets) {
-                    assertModel(model, taskType, modelIncludesSecrets, ConfigurationParseContext.REQUEST);
-                }
+            @Override
+            protected void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets) {
+                assertModel(model, taskType, modelIncludesSecrets, ConfigurationParseContext.REQUEST);
+            }
 
-                @Override
-                protected void assertModel(
-                    Model model,
-                    TaskType taskType,
-                    boolean modelIncludesSecrets,
-                    ConfigurationParseContext parseContext
-                ) {
-                    MixedbreadServiceTests.assertModel(model, taskType, modelIncludesSecrets, parseContext);
-                }
+            @Override
+            protected void assertModel(
+                Model model,
+                TaskType taskType,
+                boolean modelIncludesSecrets,
+                ConfigurationParseContext parseContext
+            ) {
+                MixedbreadServiceTests.assertModel(model, taskType, modelIncludesSecrets, parseContext);
+            }
 
-                @Override
-                protected EnumSet supportedStreamingTasks() {
-                    return null;
-                }
+            @Override
+            protected EnumSet supportedStreamingTasks() {
+                return null;
+            }
 
-                @Override
-                protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) {
-                    assertThat(rerankingInferenceService.rerankerWindowSize(MODEL_NAME_VALUE), Matchers.is(300));
-                }
+            @Override
+            protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) {
+                assertThat(rerankingInferenceService.rerankerWindowSize(MODEL_NAME_VALUE), Matchers.is(300));
             }
-        )
-            .build();
+        }).build();
     }
 
     private static void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets, ConfigurationParseContext parseContext) {

From dc34522adc2b4958c67be178d4bfd2bfa0dc89d7 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Tue, 10 Feb 2026 09:22:39 +0100
Subject: [PATCH 35/48] fix tests

---
 .../mixedbread/MixedbreadServiceTests.java    | 95 +++++++++++++------
 .../MixedbreadRerankServiceSettingsTests.java |  5 +-
 2 files changed, 68 insertions(+), 32 deletions(-)

diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
index 9eb4b06020f3a..c5f2d17c0a7ac 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
@@ -18,7 +18,6 @@
 import org.elasticsearch.common.settings.SecureString;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentHelper;
-import org.elasticsearch.inference.EmptyTaskSettings;
 import org.elasticsearch.inference.InferenceService;
 import org.elasticsearch.inference.InferenceServiceConfiguration;
 import org.elasticsearch.inference.InferenceServiceResults;
@@ -26,6 +25,8 @@
 import org.elasticsearch.inference.ModelConfigurations;
 import org.elasticsearch.inference.ModelSecrets;
 import org.elasticsearch.inference.RerankingInferenceService;
+import org.elasticsearch.inference.ServiceSettings;
+import org.elasticsearch.inference.TaskSettings;
 import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.test.http.MockResponse;
 import org.elasticsearch.test.http.MockWebServer;
@@ -58,6 +59,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 import static org.elasticsearch.common.xcontent.XContentHelper.toXContent;
 import static org.elasticsearch.inference.TaskType.COMPLETION;
@@ -86,6 +88,7 @@ public class MixedbreadServiceTests extends AbstractInferenceServiceTests {
     public static final Boolean RETURN_DOCUMENTS_TRUE = true;
     public static final Boolean RETURN_DOCUMENTS_FALSE = false;
     public static final String DEFAULT_RERANK_URL = "https://api.mixedbread.com/v1/reranking";
+    public static final String CUSTOM_URL = "https://custom.url.com/v1/rerank";
 
     private static final String INFERENCE_ID_VALUE = "id";
     private static final String MODEL_NAME_VALUE = "modelName";
@@ -114,7 +117,7 @@ protected SenderService createService(ThreadPool threadPool, HttpClientManager c
 
             @Override
             protected Map createServiceSettingsMap(TaskType taskType) {
-                return MixedbreadServiceTests.createServiceSettingsMap(taskType);
+                return MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(MODEL_NAME_VALUE, null);
             }
 
             @Override
@@ -128,7 +131,15 @@ protected ModelConfigurations createModelConfigurations(TaskType taskType) {
                             createServiceSettingsMap(taskType, ConfigurationParseContext.PERSISTENT),
                             ConfigurationParseContext.PERSISTENT
                         ),
-                        EmptyTaskSettings.INSTANCE
+                        MixedbreadRerankTaskSettings.EMPTY_SETTINGS
+                    );
+                    // Completion is not supported, but in order to test unsupported task types it is included here
+                    case COMPLETION -> new ModelConfigurations(
+                        INFERENCE_ID_VALUE,
+                        taskType,
+                        MixedbreadService.NAME,
+                        mock(ServiceSettings.class),
+                        mock(TaskSettings.class)
                     );
                     default -> throw new IllegalStateException("Unexpected value: " + taskType);
                 };
@@ -141,16 +152,15 @@ protected ModelSecrets createModelSecrets() {
 
             @Override
             protected Map createServiceSettingsMap(TaskType taskType, ConfigurationParseContext parseContext) {
-                return MixedbreadServiceTests.createServiceSettingsMap(taskType);
+                return MixedbreadServiceTests.createServiceSettingsMap(taskType, parseContext);
             }
 
             @Override
             protected Map createTaskSettingsMap(TaskType taskType) {
                 if (taskType.equals(RERANK)) {
                     return MixedbreadRerankTaskSettingsTests.getTaskSettingsMap(null, null);
-                } else {
-                    return createTaskSettingsMap();
                 }
+                return createTaskSettingsMap();
             }
 
             @Override
@@ -165,35 +175,37 @@ protected Map createSecretSettingsMap() {
 
             @Override
             protected void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets) {
-                assertModel(model, taskType, modelIncludesSecrets, ConfigurationParseContext.REQUEST);
-            }
-
-            @Override
-            protected void assertModel(
-                Model model,
-                TaskType taskType,
-                boolean modelIncludesSecrets,
-                ConfigurationParseContext parseContext
-            ) {
-                MixedbreadServiceTests.assertModel(model, taskType, modelIncludesSecrets, parseContext);
+                MixedbreadServiceTests.assertModel(model, taskType, modelIncludesSecrets);
             }
 
             @Override
             protected EnumSet supportedStreamingTasks() {
-                return null;
+                return EnumSet.noneOf(TaskType.class);
             }
 
             @Override
             protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) {
-                assertThat(rerankingInferenceService.rerankerWindowSize(MODEL_NAME_VALUE), Matchers.is(300));
+                assertThat(rerankingInferenceService.rerankerWindowSize(MODEL_NAME_VALUE), Matchers.is(22000));
             }
         }).build();
     }
 
+    private static void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets) {
+        if (Objects.requireNonNull(taskType) == RERANK) {
+            assertRerankModel(model, modelIncludesSecrets);
+        } else {
+            fail("unexpected task type [" + taskType + "]");
+        }
+    }
+
+    @Override
+    public void testParseRequestConfig_CreatesACompletionModel() {}
+
     private static void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets, ConfigurationParseContext parseContext) {
-        switch (taskType) {
-            case RERANK -> assertRerankModel(model, modelIncludesSecrets);
-            default -> fail("unexpected task type [" + taskType + "]");
+        if (Objects.requireNonNull(taskType) == RERANK) {
+            assertRerankModel(model, modelIncludesSecrets);
+        } else {
+            fail("unexpected task type [" + taskType + "]");
         }
     }
 
@@ -202,7 +214,6 @@ private static MixedbreadModel assertCommonModelFields(Model model, boolean mode
 
         var mixedbreadModel = (MixedbreadModel) model;
         assertThat(mixedbreadModel.getServiceSettings().modelId(), Matchers.is(MODEL_NAME_VALUE));
-        assertThat(mixedbreadModel.uri().toString(), Matchers.is(DEFAULT_RERANK_URL));
         if (modelIncludesSecrets) {
             assertThat(mixedbreadModel.getSecretSettings().apiKey(), Matchers.is(new SecureString(API_KEY.toCharArray())));
         }
@@ -211,7 +222,7 @@ private static MixedbreadModel assertCommonModelFields(Model model, boolean mode
 
     private static void assertRerankModel(Model model, boolean modelIncludesSecrets) {
         var mixedbreadModel = assertCommonModelFields(model, modelIncludesSecrets);
-        assertThat(mixedbreadModel.getTaskSettings(), Matchers.is(EmptyTaskSettings.INSTANCE));
+        assertThat(mixedbreadModel.getTaskSettings(), Matchers.is(MixedbreadRerankTaskSettings.EMPTY_SETTINGS));
         assertThat(mixedbreadModel.getTaskType(), Matchers.is(RERANK));
     }
 
@@ -220,17 +231,43 @@ public static SenderService createService(ThreadPool threadPool, HttpClientManag
         return new MixedbreadService(senderFactory, createWithEmptySettings(threadPool), mockClusterServiceEmpty());
     }
 
-    private static Map createServiceSettingsMap(TaskType taskType) {
-        if (taskType == RERANK) {
-            return MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(MODEL_NAME_VALUE, null);
-        }
-        return Map.of();
+    private static Map createServiceSettingsMap(TaskType taskType, ConfigurationParseContext parseContext) {
+        return MixedbreadRerankServiceSettingsTests.getServiceSettingsMap(MODEL_NAME_VALUE, null);
     }
 
     private static Map createSecretSettingsMap() {
         return new HashMap<>(Map.of("api_key", API_KEY));
     }
 
+    public void testBuildModelFromConfigAndSecrets_UnsupportedTaskType() throws IOException {
+        var modelConfigurations = new ModelConfigurations(
+            INFERENCE_ID_VALUE,
+            TaskType.COMPLETION,
+            MixedbreadService.NAME,
+            mock(ServiceSettings.class)
+        );
+        try (var inferenceService = createInferenceService()) {
+            var thrownException = expectThrows(
+                ElasticsearchStatusException.class,
+                () -> inferenceService.buildModelFromConfigAndSecrets(modelConfigurations, mock(ModelSecrets.class))
+            );
+            assertThat(
+                thrownException.getMessage(),
+                CoreMatchers.is(
+                    org.elasticsearch.core.Strings.format(
+                        """
+                            Failed to parse stored model [%s] for [%s] service, error: [The [%s] service does not support task type [%s]]. \
+                            Please delete and add the service again""",
+                        INFERENCE_ID_VALUE,
+                        MixedbreadService.NAME,
+                        MixedbreadService.NAME,
+                        TaskType.COMPLETION
+                    )
+                )
+            );
+        }
+    }
+
     @Before
     public void init() throws Exception {
         webServer.start();
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java
index 131e904779c68..c9353d4a0bc61 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/rerank/MixedbreadRerankServiceSettingsTests.java
@@ -34,7 +34,7 @@ public static MixedbreadRerankServiceSettings createRandom() {
     }
 
     public static MixedbreadRerankServiceSettings createRandom(@Nullable RateLimitSettings rateLimitSettings) {
-        return new MixedbreadRerankServiceSettings(randomAlphaOfLengthOrNull(10), rateLimitSettings);
+        return new MixedbreadRerankServiceSettings(randomAlphaOfLength(10), rateLimitSettings);
     }
 
     public void testToXContent_WritesAllValues() throws IOException {
@@ -82,11 +82,10 @@ protected MixedbreadRerankServiceSettings mutateInstance(MixedbreadRerankService
         var modelId = instance.modelId();
         var rateLimitSettings = instance.rateLimitSettings();
         switch (randomInt(1)) {
-            case 0 -> modelId = randomValueOtherThan(modelId, () -> randomAlphaOfLengthOrNull(10));
+            case 0 -> modelId = randomValueOtherThan(modelId, () -> randomAlphaOfLength(8));
             case 1 -> rateLimitSettings = randomValueOtherThan(rateLimitSettings, RateLimitSettingsTests::createRandom);
             default -> throw new AssertionError("Illegal randomisation branch");
         }
-
         return new MixedbreadRerankServiceSettings(modelId, rateLimitSettings);
     }
 

From 2ca9c7e67ca1cef887c883e6133d8e583c8fcce3 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Tue, 10 Feb 2026 09:57:00 +0100
Subject: [PATCH 36/48] update transport version

---
 .../definitions/referable/inference_mixedbread_added.csv        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
index 36c3efe7e7b78..c87f7a787c8d8 100644
--- a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
+++ b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
@@ -1 +1 @@
-9277000
+9279000

From 37b362c9dff91fcb112ee4f4cefe67ea7e77f21b Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Tue, 10 Feb 2026 11:36:38 +0100
Subject: [PATCH 37/48] add transport version and upper bound

---
 .../definitions/referable/inference_mixedbread_added.csv        | 2 +-
 server/src/main/resources/transport/upper_bounds/9.4.csv        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
index c87f7a787c8d8..13706b229b5d4 100644
--- a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
+++ b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
@@ -1 +1 @@
-9279000
+9280000
diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv
index 98fdb11919e86..c0924f9e0bab9 100644
--- a/server/src/main/resources/transport/upper_bounds/9.4.csv
+++ b/server/src/main/resources/transport/upper_bounds/9.4.csv
@@ -1 +1 @@
-find_structure_request_should_parse_recursively,9279000
+inference_mixedbread_added,9280000

From 4547b97970735a39010f53048d4bf327014374b6 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 00:41:22 +0100
Subject: [PATCH 38/48] Address comments

---
 .../mixedbread/MixedbreadService.java         | 25 ++++++++-----------
 .../AbstractInferenceServiceTests.java        |  1 +
 .../mixedbread/MixedbreadServiceTests.java    | 12 ---------
 3 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
index f34988dfa95eb..d4adfe2acb016 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadService.java
@@ -61,15 +61,17 @@ public class MixedbreadService extends SenderService implements RerankingInferen
 
     private static final EnumSet SUPPORTED_TASK_TYPES = EnumSet.of(TaskType.RERANK);
 
+    /**
+     * {@link #rerankerWindowSize(String modelId)} method returns the size in words, not in tokens, so we'll need to translate
+     * tokens to words by multiplying by 0.75 and rounding down
+
+     * The context window size for v1 models is 512 tokens / 300 words
+     * For v2 models it is from 8k / 5500 words to 32k / 22000 words
+     * tokens to words conversion reference
+     */
+    private static final int DEFAULT_RERANKER_INPUT_SIZE_WORDS = 22000;
+
     private static final Map RERANKERS_INPUT_SIZE = Map.of(
-        // Windows size.
-        // The v1 models: 512
-        // The v2 models: at least 8k
-        // https://www.mixedbread.com/docs/models/reranking/mxbai-rerank-large-v1
-
-        // rerankerWindowSize() method returns the size in words, not in tokens, so we'll need to translate
-        // tokens to words by multiplying by 0.75 and rounding down
-        // https://github.com/elastic/elasticsearch/pull/132169
         "mixedbread-ai/mxbai-rerank-xsmall-v1",
         300,
         "mixedbread-ai/mxbai-rerank-base-v1",
@@ -78,13 +80,6 @@ public class MixedbreadService extends SenderService implements RerankingInferen
         300
     );
 
-    /**
-     * Apart from v1 all other models have a context length of up to 32k.
-     * Here
-     * 8k tokens were converted into 5500 words, that's why the default window size is set to 22000
-     */
-    private static final int DEFAULT_RERANKER_INPUT_SIZE_WORDS = 22000;
-
     private static final Map> MODEL_CREATORS = Map.of(
         TaskType.RERANK,
         new MixedbreadRerankModelCreator()
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/AbstractInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/AbstractInferenceServiceTests.java
index dc00aa84e6880..ad5e9b1dff810 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/AbstractInferenceServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/AbstractInferenceServiceTests.java
@@ -93,6 +93,7 @@ public void testParseRequestConfig_CreatesAnEmbeddingsModelWhenChunkingSettingsP
 
     public void testParseRequestConfig_CreatesACompletionModel() throws Exception {
         var parseRequestConfigTestConfig = testConfiguration.commonConfig();
+        Assume.assumeTrue(testConfiguration.commonConfig().supportedTaskTypes().contains(TaskType.COMPLETION));
 
         try (var service = parseRequestConfigTestConfig.createService(threadPool, clientManager)) {
             var config = getRequestConfigMap(
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
index c5f2d17c0a7ac..a780e77af11e6 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mixedbread/MixedbreadServiceTests.java
@@ -88,7 +88,6 @@ public class MixedbreadServiceTests extends AbstractInferenceServiceTests {
     public static final Boolean RETURN_DOCUMENTS_TRUE = true;
     public static final Boolean RETURN_DOCUMENTS_FALSE = false;
     public static final String DEFAULT_RERANK_URL = "https://api.mixedbread.com/v1/reranking";
-    public static final String CUSTOM_URL = "https://custom.url.com/v1/rerank";
 
     private static final String INFERENCE_ID_VALUE = "id";
     private static final String MODEL_NAME_VALUE = "modelName";
@@ -198,17 +197,6 @@ private static void assertModel(Model model, TaskType taskType, boolean modelInc
         }
     }
 
-    @Override
-    public void testParseRequestConfig_CreatesACompletionModel() {}
-
-    private static void assertModel(Model model, TaskType taskType, boolean modelIncludesSecrets, ConfigurationParseContext parseContext) {
-        if (Objects.requireNonNull(taskType) == RERANK) {
-            assertRerankModel(model, modelIncludesSecrets);
-        } else {
-            fail("unexpected task type [" + taskType + "]");
-        }
-    }
-
     private static MixedbreadModel assertCommonModelFields(Model model, boolean modelIncludesSecrets) {
         assertThat(model, instanceOf(MixedbreadModel.class));
 

From d69ac129f051c964db88614da4cbcacb714af302 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 00:46:04 +0100
Subject: [PATCH 39/48] Update transport version

---
 .../definitions/referable/inference_mixedbread_added.csv        | 2 +-
 server/src/main/resources/transport/upper_bounds/9.4.csv        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
index 13706b229b5d4..eb27c20e6c5e5 100644
--- a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
+++ b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
@@ -1 +1 @@
-9280000
+9282000
diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv
index 19ed5d7477ee1..2a4cbabed1801 100644
--- a/server/src/main/resources/transport/upper_bounds/9.4.csv
+++ b/server/src/main/resources/transport/upper_bounds/9.4.csv
@@ -1 +1 @@
-filter_function_name,9281000
+inference_mixedbread_added,9282000

From 73b1695b79ceda682a1b35fcb010dcc84df43fcd Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 16:24:17 +0100
Subject: [PATCH 40/48] Use 'declareStringOrNull' for the input field

---
 .../response/MixedbreadRerankResponseEntity.java           | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
index deffd05aa2563..ec71852f442c7 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
@@ -132,12 +132,7 @@ private record ResultItem(int index, float relevanceScore, @Nullable Document do
         static {
             PARSER.declareInt(constructorArg(), new ParseField("index"));
             PARSER.declareFloat(constructorArg(), new ParseField("score"));
-            PARSER.declareField(
-                optionalConstructorArg(),
-                (p, c) -> parseDocument(p),
-                new ParseField("input"),
-                ObjectParser.ValueType.VALUE
-            );
+            PARSER.declareStringOrNull(constructorArg(), new ParseField("input"));
         }
     }
 

From 368038b05a1b9098bb6802626c11a30fd75f1800 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine 
Date: Thu, 12 Feb 2026 15:32:37 +0000
Subject: [PATCH 41/48] [CI] Auto commit changes from spotless

---
 .../mixedbread/response/MixedbreadRerankResponseEntity.java     | 2 --
 1 file changed, 2 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
index ec71852f442c7..444d5fede28bd 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
@@ -10,7 +10,6 @@
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.inference.InferenceServiceResults;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
-import org.elasticsearch.xcontent.ObjectParser;
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.XContentFactory;
 import org.elasticsearch.xcontent.XContentParseException;
@@ -24,7 +23,6 @@
 import java.util.List;
 
 import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
-import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
 
 public class MixedbreadRerankResponseEntity {
 

From 2124aa920215b34c893fb6255ded20262235c655 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 16:46:02 +0100
Subject: [PATCH 42/48] Correct MixedbreadRerankResponseEntity

---
 .../MixedbreadRerankResponseEntity.java       | 40 ++-----------------
 1 file changed, 3 insertions(+), 37 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
index ec71852f442c7..7d5bd93797f3b 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
@@ -10,11 +10,8 @@
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.inference.InferenceServiceResults;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
-import org.elasticsearch.xcontent.ObjectParser;
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.XContentFactory;
-import org.elasticsearch.xcontent.XContentParseException;
-import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xcontent.XContentParserConfiguration;
 import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
@@ -24,7 +21,6 @@
 import java.util.List;
 
 import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
-import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
 
 public class MixedbreadRerankResponseEntity {
 
@@ -114,7 +110,7 @@ public RankedDocsResults toRankedDocsResults() {
                     item -> new RankedDocsResults.RankedDoc(
                         item.index(),
                         item.relevanceScore(),
-                        item.document() != null ? item.document().text() : null
+                        item.document() != null ? item.document() : null
                     )
                 )
                 .toList();
@@ -122,11 +118,11 @@ public RankedDocsResults toRankedDocsResults() {
         }
     }
 
-    private record ResultItem(int index, float relevanceScore, @Nullable Document document) {
+    private record ResultItem(int index, float relevanceScore, @Nullable String document) {
         public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(
             ResultItem.class.getSimpleName(),
             true,
-            args -> new ResultItem((Integer) args[0], (Float) args[1], (Document) args[2])
+            args -> new ResultItem((Integer) args[0], (Float) args[1], (String) args[2])
         );
 
         static {
@@ -135,34 +131,4 @@ private record ResultItem(int index, float relevanceScore, @Nullable Document do
             PARSER.declareStringOrNull(constructorArg(), new ParseField("input"));
         }
     }
-
-    private record Document(String text) {}
-
-    private static Document parseDocument(XContentParser parser) throws IOException {
-        var token = parser.currentToken();
-        if (token == XContentParser.Token.START_OBJECT) {
-            return new Document(DocumentObject.PARSER.apply(parser, null).text());
-        } else if (token == XContentParser.Token.VALUE_STRING) {
-            return new Document(parser.text());
-        } else if (token == XContentParser.Token.VALUE_NULL) {
-            return new Document(null);
-        }
-
-        throw new XContentParseException(
-            parser.getTokenLocation(),
-            "Expected an object, string or null for document field, but got: " + token
-        );
-    }
-
-    private record DocumentObject(String text) {
-        public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(
-            DocumentObject.class.getSimpleName(),
-            true,
-            args -> new DocumentObject((String) args[0])
-        );
-
-        static {
-            PARSER.declareString(constructorArg(), new ParseField("text"));
-        }
-    }
 }

From 9de451c29b42e0a9ad97f06ad470d8362f88e16e Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 16:46:15 +0100
Subject: [PATCH 43/48] Revert "Correct MixedbreadRerankResponseEntity"

This reverts commit 2124aa920215b34c893fb6255ded20262235c655.
---
 .../MixedbreadRerankResponseEntity.java       | 40 +++++++++++++++++--
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
index 7d5bd93797f3b..ec71852f442c7 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
@@ -10,8 +10,11 @@
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.inference.InferenceServiceResults;
 import org.elasticsearch.xcontent.ConstructingObjectParser;
+import org.elasticsearch.xcontent.ObjectParser;
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.XContentParseException;
+import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xcontent.XContentParserConfiguration;
 import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
@@ -21,6 +24,7 @@
 import java.util.List;
 
 import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
+import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
 
 public class MixedbreadRerankResponseEntity {
 
@@ -110,7 +114,7 @@ public RankedDocsResults toRankedDocsResults() {
                     item -> new RankedDocsResults.RankedDoc(
                         item.index(),
                         item.relevanceScore(),
-                        item.document() != null ? item.document() : null
+                        item.document() != null ? item.document().text() : null
                     )
                 )
                 .toList();
@@ -118,11 +122,11 @@ public RankedDocsResults toRankedDocsResults() {
         }
     }
 
-    private record ResultItem(int index, float relevanceScore, @Nullable String document) {
+    private record ResultItem(int index, float relevanceScore, @Nullable Document document) {
         public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(
             ResultItem.class.getSimpleName(),
             true,
-            args -> new ResultItem((Integer) args[0], (Float) args[1], (String) args[2])
+            args -> new ResultItem((Integer) args[0], (Float) args[1], (Document) args[2])
         );
 
         static {
@@ -131,4 +135,34 @@ private record ResultItem(int index, float relevanceScore, @Nullable String docu
             PARSER.declareStringOrNull(constructorArg(), new ParseField("input"));
         }
     }
+
+    private record Document(String text) {}
+
+    private static Document parseDocument(XContentParser parser) throws IOException {
+        var token = parser.currentToken();
+        if (token == XContentParser.Token.START_OBJECT) {
+            return new Document(DocumentObject.PARSER.apply(parser, null).text());
+        } else if (token == XContentParser.Token.VALUE_STRING) {
+            return new Document(parser.text());
+        } else if (token == XContentParser.Token.VALUE_NULL) {
+            return new Document(null);
+        }
+
+        throw new XContentParseException(
+            parser.getTokenLocation(),
+            "Expected an object, string or null for document field, but got: " + token
+        );
+    }
+
+    private record DocumentObject(String text) {
+        public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(
+            DocumentObject.class.getSimpleName(),
+            true,
+            args -> new DocumentObject((String) args[0])
+        );
+
+        static {
+            PARSER.declareString(constructorArg(), new ParseField("text"));
+        }
+    }
 }

From 89f6d6cd97c9a8f9c4a2d324b3536f74e357c988 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 16:52:24 +0100
Subject: [PATCH 44/48] Correct MixedbreadRerankResponseEntity

---
 .../MixedbreadRerankResponseEntity.java       | 38 ++-----------------
 1 file changed, 3 insertions(+), 35 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
index 444d5fede28bd..7d5bd93797f3b 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
@@ -12,8 +12,6 @@
 import org.elasticsearch.xcontent.ConstructingObjectParser;
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.XContentFactory;
-import org.elasticsearch.xcontent.XContentParseException;
-import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xcontent.XContentParserConfiguration;
 import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
@@ -112,7 +110,7 @@ public RankedDocsResults toRankedDocsResults() {
                     item -> new RankedDocsResults.RankedDoc(
                         item.index(),
                         item.relevanceScore(),
-                        item.document() != null ? item.document().text() : null
+                        item.document() != null ? item.document() : null
                     )
                 )
                 .toList();
@@ -120,11 +118,11 @@ public RankedDocsResults toRankedDocsResults() {
         }
     }
 
-    private record ResultItem(int index, float relevanceScore, @Nullable Document document) {
+    private record ResultItem(int index, float relevanceScore, @Nullable String document) {
         public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(
             ResultItem.class.getSimpleName(),
             true,
-            args -> new ResultItem((Integer) args[0], (Float) args[1], (Document) args[2])
+            args -> new ResultItem((Integer) args[0], (Float) args[1], (String) args[2])
         );
 
         static {
@@ -133,34 +131,4 @@ private record ResultItem(int index, float relevanceScore, @Nullable Document do
             PARSER.declareStringOrNull(constructorArg(), new ParseField("input"));
         }
     }
-
-    private record Document(String text) {}
-
-    private static Document parseDocument(XContentParser parser) throws IOException {
-        var token = parser.currentToken();
-        if (token == XContentParser.Token.START_OBJECT) {
-            return new Document(DocumentObject.PARSER.apply(parser, null).text());
-        } else if (token == XContentParser.Token.VALUE_STRING) {
-            return new Document(parser.text());
-        } else if (token == XContentParser.Token.VALUE_NULL) {
-            return new Document(null);
-        }
-
-        throw new XContentParseException(
-            parser.getTokenLocation(),
-            "Expected an object, string or null for document field, but got: " + token
-        );
-    }
-
-    private record DocumentObject(String text) {
-        public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(
-            DocumentObject.class.getSimpleName(),
-            true,
-            args -> new DocumentObject((String) args[0])
-        );
-
-        static {
-            PARSER.declareString(constructorArg(), new ParseField("text"));
-        }
-    }
 }

From e0339fe178b8b7e8475bb7bc6b7255f0b89cbf2e Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 17:25:35 +0100
Subject: [PATCH 45/48] address the comment

---
 .../mixedbread/response/MixedbreadRerankResponseEntity.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
index 7d5bd93797f3b..fde809e770b9d 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
@@ -110,7 +110,7 @@ public RankedDocsResults toRankedDocsResults() {
                     item -> new RankedDocsResults.RankedDoc(
                         item.index(),
                         item.relevanceScore(),
-                        item.document() != null ? item.document() : null
+                        item.document()
                     )
                 )
                 .toList();

From a50af0ab6c09ebee3537d56f7a7a5a7e65494c53 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine 
Date: Thu, 12 Feb 2026 16:33:36 +0000
Subject: [PATCH 46/48] [CI] Auto commit changes from spotless

---
 .../response/MixedbreadRerankResponseEntity.java          | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
index fde809e770b9d..3cd6be295c2ff 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
@@ -106,13 +106,7 @@ private record Response(List results) {
 
         public RankedDocsResults toRankedDocsResults() {
             List rankedDocs = results.stream()
-                .map(
-                    item -> new RankedDocsResults.RankedDoc(
-                        item.index(),
-                        item.relevanceScore(),
-                        item.document()
-                    )
-                )
+                .map(item -> new RankedDocsResults.RankedDoc(item.index(), item.relevanceScore(), item.document()))
                 .toList();
             return new RankedDocsResults(rankedDocs);
         }

From 5226cd2b5d10a05e9864464743dc8a09c0a8b9b6 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 18:07:18 +0100
Subject: [PATCH 47/48] fix the test

---
 .../mixedbread/response/MixedbreadRerankResponseEntity.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
index 3cd6be295c2ff..f125d37af53f2 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mixedbread/response/MixedbreadRerankResponseEntity.java
@@ -21,6 +21,7 @@
 import java.util.List;
 
 import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
+import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
 
 public class MixedbreadRerankResponseEntity {
 
@@ -122,7 +123,7 @@ private record ResultItem(int index, float relevanceScore, @Nullable String docu
         static {
             PARSER.declareInt(constructorArg(), new ParseField("index"));
             PARSER.declareFloat(constructorArg(), new ParseField("score"));
-            PARSER.declareStringOrNull(constructorArg(), new ParseField("input"));
+            PARSER.declareStringOrNull(optionalConstructorArg(), new ParseField("input"));
         }
     }
 }

From 83292149fc2009abac69b1fb6f8ca42a4ab2b142 Mon Sep 17 00:00:00 2001
From: Evgenii Kazannik 
Date: Thu, 12 Feb 2026 18:27:31 +0100
Subject: [PATCH 48/48] Add version

---
 .../definitions/referable/inference_mixedbread_added.csv        | 2 +-
 server/src/main/resources/transport/upper_bounds/9.4.csv        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
index eb27c20e6c5e5..e270b67a7c44f 100644
--- a/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
+++ b/server/src/main/resources/transport/definitions/referable/inference_mixedbread_added.csv
@@ -1 +1 @@
-9282000
+9283000
diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv
index d1b7b2100f561..074433fd98de2 100644
--- a/server/src/main/resources/transport/upper_bounds/9.4.csv
+++ b/server/src/main/resources/transport/upper_bounds/9.4.csv
@@ -1 +1 @@
-esql_batch_page,9282000
+inference_mixedbread_added,9283000