From c9c4d4725f58cff1133d5e625a022ecec921390c Mon Sep 17 00:00:00 2001 From: donalevans Date: Wed, 7 Jan 2026 17:01:32 -0800 Subject: [PATCH 1/5] Include rerank in supported tasks for IBM watsonx integration Closes #140328 --- .../xpack/inference/InferenceGetServicesIT.java | 1 + .../services/ibmwatsonx/IbmWatsonxService.java | 15 +++++++++++++-- .../ibmwatsonx/IbmWatsonxServiceTests.java | 16 +++++++++++----- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java index e356edec7d40c..69976cb5d6b82 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java @@ -149,6 +149,7 @@ public void testGetServicesWithRerankTaskType() throws IOException { "openshift_ai", "test_reranking_service", "voyageai", + "watsonxai", "hugging_face", "amazon_sagemaker", "elastic" diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java index 24273fac6b61b..8285b6dad4c76 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java @@ -24,6 +24,7 @@ import org.elasticsearch.inference.Model; import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.RerankingInferenceService; import org.elasticsearch.inference.SettingsConfiguration; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; @@ -68,7 +69,7 @@ import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields.EMBEDDING_MAX_BATCH_SIZE; import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields.PROJECT_ID; -public class IbmWatsonxService extends SenderService { +public class IbmWatsonxService extends SenderService implements RerankingInferenceService { public static final String NAME = "watsonxai"; @@ -76,7 +77,8 @@ public class IbmWatsonxService extends SenderService { private static final EnumSet supportedTaskTypes = EnumSet.of( TaskType.TEXT_EMBEDDING, TaskType.COMPLETION, - TaskType.CHAT_COMPLETION + TaskType.CHAT_COMPLETION, + TaskType.RERANK ); private static final ResponseHandler UNIFIED_CHAT_COMPLETION_HANDLER = new IbmWatsonUnifiedChatCompletionResponseHandler( "IBM watsonx chat completions", @@ -362,6 +364,15 @@ protected IbmWatsonxActionCreator getActionCreator(Sender sender, ServiceCompone return new IbmWatsonxActionCreator(getSender(), getServiceComponents()); } + @Override + public int rerankerWindowSize(String modelId) { + // IBM watsonx has a single rerank model with a token limit of 512 + // (see https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx#reranker-overview) + // Using 1 token = 0.75 words as a rough estimate, we get 384 words + // allowing for some headroom, we set the window size below 384 words + return 350; + } + public static class Configuration { public static InferenceServiceConfiguration get() { return configuration.getOrCompute(); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java index 659e5c70c7677..39c75b0101af1 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.inference.InputType; import org.elasticsearch.inference.Model; import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.RerankingInferenceService; import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.http.MockResponse; @@ -943,7 +944,7 @@ public void testGetConfiguration() throws Exception { { "service": "watsonxai", "name": "IBM watsonx", - "task_types": ["text_embedding", "completion", "chat_completion"], + "task_types": ["text_embedding", "rerank", "completion", "chat_completion"], "configurations": { "project_id": { "description": "", @@ -952,7 +953,7 @@ public void testGetConfiguration() throws Exception { "sensitive": false, "updatable": false, "type": "str", - "supported_task_types": ["text_embedding", "completion", "chat_completion"] + "supported_task_types": ["text_embedding", "rerank", "completion", "chat_completion"] }, "model_id": { "description": "The name of the model to use for the inference task.", @@ -961,7 +962,7 @@ public void testGetConfiguration() throws Exception { "sensitive": false, "updatable": false, "type": "str", - "supported_task_types": ["text_embedding", "completion", "chat_completion"] + "supported_task_types": ["text_embedding", "rerank", "completion", "chat_completion"] }, "api_version": { "description": "The IBM watsonx API version ID to use.", @@ -970,7 +971,7 @@ public void testGetConfiguration() throws Exception { "sensitive": false, "updatable": false, "type": "str", - "supported_task_types": ["text_embedding", "completion", "chat_completion"] + "supported_task_types": ["text_embedding", "rerank", "completion", "chat_completion"] }, "max_input_tokens": { "description": "Allows you to specify the maximum number of tokens per input.", @@ -988,7 +989,7 @@ public void testGetConfiguration() throws Exception { "sensitive": false, "updatable": false, "type": "str", - "supported_task_types": ["text_embedding", "completion", "chat_completion"] + "supported_task_types": ["text_embedding", "rerank", "completion", "chat_completion"] } } } @@ -1050,6 +1051,11 @@ public InferenceService createInferenceService() { return createIbmWatsonxService(); } + @Override + protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) { + assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(350)); + } + private static class IbmWatsonxServiceWithoutAuth extends IbmWatsonxService { IbmWatsonxServiceWithoutAuth(HttpRequestSender.Factory factory, ServiceComponents serviceComponents) { super(factory, serviceComponents, mockClusterServiceEmpty()); From 983fd5116878d293d274110c5f5b1cd9641fdb94 Mon Sep 17 00:00:00 2001 From: Donal Evans Date: Wed, 7 Jan 2026 17:06:29 -0800 Subject: [PATCH 2/5] Update docs/changelog/140331.yaml --- docs/changelog/140331.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/140331.yaml diff --git a/docs/changelog/140331.yaml b/docs/changelog/140331.yaml new file mode 100644 index 0000000000000..19568786d098e --- /dev/null +++ b/docs/changelog/140331.yaml @@ -0,0 +1,6 @@ +pr: 140331 +summary: "[Inference API] Include rerank in supported tasks for IBM watsonx integration" +area: Inference +type: bug +issues: + - 140328 From 69abd0c12977850ad4dca6c0850130e7a65e3169 Mon Sep 17 00:00:00 2001 From: donalevans Date: Thu, 8 Jan 2026 08:02:40 -0800 Subject: [PATCH 3/5] Extract rerank window size to a constant --- .../services/ibmwatsonx/IbmWatsonxService.java | 16 +++++++++------- .../ibmwatsonx/IbmWatsonxServiceTests.java | 3 ++- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java index 8285b6dad4c76..a7ffe1c668651 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java @@ -71,8 +71,6 @@ public class IbmWatsonxService extends SenderService implements RerankingInferenceService { - public static final String NAME = "watsonxai"; - private static final String SERVICE_NAME = "IBM watsonx"; private static final EnumSet supportedTaskTypes = EnumSet.of( TaskType.TEXT_EMBEDDING, @@ -85,6 +83,14 @@ public class IbmWatsonxService extends SenderService implements RerankingInferen OpenAiChatCompletionResponseEntity::fromResponse ); + public static final String NAME = "watsonxai"; + + // IBM watsonx has a single rerank model with a token limit of 512 + // (see https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx#reranker-overview) + // Using 1 token = 0.75 words as a rough estimate, we get 384 words + // allowing for some headroom, we set the window size below 384 words + public static final int RERANK_WINDOW_SIZE = 350; + public IbmWatsonxService( HttpRequestSender.Factory factory, ServiceComponents serviceComponents, @@ -366,11 +372,7 @@ protected IbmWatsonxActionCreator getActionCreator(Sender sender, ServiceCompone @Override public int rerankerWindowSize(String modelId) { - // IBM watsonx has a single rerank model with a token limit of 512 - // (see https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx#reranker-overview) - // Using 1 token = 0.75 words as a rough estimate, we get 384 words - // allowing for some headroom, we set the window size below 384 words - return 350; + return RERANK_WINDOW_SIZE; } public static class Configuration { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java index 39c75b0101af1..792fed20970e7 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java @@ -83,6 +83,7 @@ import static org.elasticsearch.xpack.inference.services.SenderServiceTests.createMockSender; import static org.elasticsearch.xpack.inference.services.ServiceComponentsTests.createWithEmptySettings; import static org.elasticsearch.xpack.inference.services.cohere.embeddings.CohereEmbeddingsTaskSettingsTests.getTaskSettingsMapEmpty; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxService.RERANK_WINDOW_SIZE; import static org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettingsTests.getSecretSettingsMap; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.is; @@ -1053,7 +1054,7 @@ public InferenceService createInferenceService() { @Override protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) { - assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(350)); + assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(RERANK_WINDOW_SIZE)); } private static class IbmWatsonxServiceWithoutAuth extends IbmWatsonxService { From 6d56e6f5c0c5d0fc920ba7f5ad7e67927bec881a Mon Sep 17 00:00:00 2001 From: donalevans Date: Thu, 8 Jan 2026 09:58:41 -0800 Subject: [PATCH 4/5] Update REST API spec file --- .../resources/rest-api-spec/api/inference.put_watsonx.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_watsonx.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_watsonx.json index e9852eda3048e..cd7b3688ac883 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_watsonx.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_watsonx.json @@ -28,7 +28,8 @@ "options": [ "text_embedding", "chat_completion", - "completion" + "completion", + "rerank" ] }, "watsonx_inference_id": { From 48083aa978e4e3e5a1ca2ee6f4505178ee0f0724 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 8 Jan 2026 20:03:20 +0000 Subject: [PATCH 5/5] [CI] Auto commit changes from spotless --- .../index/mapper/MultiValuedBinaryDocValuesField.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MultiValuedBinaryDocValuesField.java b/server/src/main/java/org/elasticsearch/index/mapper/MultiValuedBinaryDocValuesField.java index 75b418e0c0082..d036b74fe94ba 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MultiValuedBinaryDocValuesField.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MultiValuedBinaryDocValuesField.java @@ -54,7 +54,7 @@ public int count() { protected void writeLenAndValues(BytesStreamOutput out) throws IOException { // sort the ArrayList variant of the collection prior to serializing it into a binary array if (values instanceof ArrayList list) { - list.sort(Comparator.naturalOrder()); + list.sort(Comparator.naturalOrder()); } for (BytesRef value : values) {