diff --git a/explore-analyze/elastic-inference.md b/explore-analyze/elastic-inference.md new file mode 100644 index 0000000000..debda95960 --- /dev/null +++ b/explore-analyze/elastic-inference.md @@ -0,0 +1,14 @@ +--- +applies_to: + stack: ga + serverless: ga +navigation_title: Elastic Inference +--- + +# Elastic {{infer-cap}} + +There are several ways to perform {{infer}} in the {{stack}}. This page provides a brief overview of the different methods: + +* [Using EIS (Elastic Inference Service)](elastic-inference/eis.md) +* [Using the {{infer}} API](elastic-inference/inference-api.md) +* [Trained models deployed in your cluster](machine-learning/nlp/ml-nlp-overview.md) diff --git a/explore-analyze/elastic-inference/eis.md b/explore-analyze/elastic-inference/eis.md new file mode 100644 index 0000000000..9a28237447 --- /dev/null +++ b/explore-analyze/elastic-inference/eis.md @@ -0,0 +1,10 @@ +--- +applies_to: + stack: ga + serverless: ga +navigation_title: Elastic Inference Service (EIS) +--- + +# Elastic {{infer-cap}} Service + +This is the documentation of the Elastic Inference Service. diff --git a/solutions/search/inference-api.md b/explore-analyze/elastic-inference/inference-api.md similarity index 100% rename from solutions/search/inference-api.md rename to explore-analyze/elastic-inference/inference-api.md diff --git a/solutions/search/inference-api/alibabacloud-ai-search-inference-integration.md b/explore-analyze/elastic-inference/inference-api/alibabacloud-ai-search-inference-integration.md similarity index 100% rename from solutions/search/inference-api/alibabacloud-ai-search-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/alibabacloud-ai-search-inference-integration.md diff --git a/solutions/search/inference-api/amazon-bedrock-inference-integration.md b/explore-analyze/elastic-inference/inference-api/amazon-bedrock-inference-integration.md similarity index 100% rename from solutions/search/inference-api/amazon-bedrock-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/amazon-bedrock-inference-integration.md diff --git a/solutions/search/inference-api/anthropic-inference-integration.md b/explore-analyze/elastic-inference/inference-api/anthropic-inference-integration.md similarity index 100% rename from solutions/search/inference-api/anthropic-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/anthropic-inference-integration.md diff --git a/solutions/search/inference-api/azure-ai-studio-inference-integration.md b/explore-analyze/elastic-inference/inference-api/azure-ai-studio-inference-integration.md similarity index 100% rename from solutions/search/inference-api/azure-ai-studio-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/azure-ai-studio-inference-integration.md diff --git a/solutions/search/inference-api/azure-openai-inference-integration.md b/explore-analyze/elastic-inference/inference-api/azure-openai-inference-integration.md similarity index 100% rename from solutions/search/inference-api/azure-openai-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/azure-openai-inference-integration.md diff --git a/solutions/search/inference-api/chat-completion-inference-api.md b/explore-analyze/elastic-inference/inference-api/chat-completion-inference-api.md similarity index 100% rename from solutions/search/inference-api/chat-completion-inference-api.md rename to explore-analyze/elastic-inference/inference-api/chat-completion-inference-api.md diff --git a/solutions/search/inference-api/cohere-inference-integration.md b/explore-analyze/elastic-inference/inference-api/cohere-inference-integration.md similarity index 100% rename from solutions/search/inference-api/cohere-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/cohere-inference-integration.md diff --git a/solutions/search/inference-api/elastic-inference-service-eis.md b/explore-analyze/elastic-inference/inference-api/elastic-inference-service-eis.md similarity index 96% rename from solutions/search/inference-api/elastic-inference-service-eis.md rename to explore-analyze/elastic-inference/inference-api/elastic-inference-service-eis.md index 3eaf36f177..d6127e53f3 100644 --- a/solutions/search/inference-api/elastic-inference-service-eis.md +++ b/explore-analyze/elastic-inference/inference-api/elastic-inference-service-eis.md @@ -38,7 +38,7 @@ Creates an {{infer}} endpoint to perform an {{infer}} task with the `elastic` se ::::{note} The `chat_completion` task type only supports streaming and only through the `_stream` API. -For more information on how to use the `chat_completion` task type, please refer to the [chat completion documentation](/solutions/search/inference-api/chat-completion-inference-api.md). +For more information on how to use the `chat_completion` task type, please refer to the [chat completion documentation](chat-completion-inference-api.md). :::: diff --git a/solutions/search/inference-api/elasticsearch-inference-integration.md b/explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md similarity index 100% rename from solutions/search/inference-api/elasticsearch-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md diff --git a/solutions/search/inference-api/elser-inference-integration.md b/explore-analyze/elastic-inference/inference-api/elser-inference-integration.md similarity index 100% rename from solutions/search/inference-api/elser-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/elser-inference-integration.md diff --git a/solutions/search/inference-api/google-ai-studio-inference-integration.md b/explore-analyze/elastic-inference/inference-api/google-ai-studio-inference-integration.md similarity index 100% rename from solutions/search/inference-api/google-ai-studio-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/google-ai-studio-inference-integration.md diff --git a/solutions/search/inference-api/google-vertex-ai-inference-integration.md b/explore-analyze/elastic-inference/inference-api/google-vertex-ai-inference-integration.md similarity index 100% rename from solutions/search/inference-api/google-vertex-ai-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/google-vertex-ai-inference-integration.md diff --git a/solutions/search/inference-api/huggingface-inference-integration.md b/explore-analyze/elastic-inference/inference-api/huggingface-inference-integration.md similarity index 100% rename from solutions/search/inference-api/huggingface-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/huggingface-inference-integration.md diff --git a/solutions/search/inference-api/jinaai-inference-integration.md b/explore-analyze/elastic-inference/inference-api/jinaai-inference-integration.md similarity index 100% rename from solutions/search/inference-api/jinaai-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/jinaai-inference-integration.md diff --git a/solutions/search/inference-api/mistral-inference-integration.md b/explore-analyze/elastic-inference/inference-api/mistral-inference-integration.md similarity index 100% rename from solutions/search/inference-api/mistral-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/mistral-inference-integration.md diff --git a/solutions/search/inference-api/openai-inference-integration.md b/explore-analyze/elastic-inference/inference-api/openai-inference-integration.md similarity index 100% rename from solutions/search/inference-api/openai-inference-integration.md rename to explore-analyze/elastic-inference/inference-api/openai-inference-integration.md diff --git a/explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md b/explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md index 4a9f3f86e5..ea90091f4a 100644 --- a/explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md +++ b/explore-analyze/machine-learning/nlp/ml-nlp-auto-scale.md @@ -27,7 +27,7 @@ When adaptive allocations are enabled, the number of allocations of the model is You can enable adaptive allocations by using: -* the create inference endpoint API for [ELSER](../../../solutions/search/inference-api/elser-inference-integration.md), [E5 and models uploaded through Eland](../../../solutions/search/inference-api/elasticsearch-inference-integration.md) that are used as {{infer}} services. +* the create inference endpoint API for [ELSER](../../elastic-inference/inference-api/elser-inference-integration.md), [E5 and models uploaded through Eland](../../elastic-inference/inference-api/elasticsearch-inference-integration.md) that are used as {{infer}} services. * the [start trained model deployment](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-start-trained-model-deployment) or [update trained model deployment](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-update-trained-model-deployment) APIs for trained models that are deployed on {{ml}} nodes. If the new allocations fit on the current {{ml}} nodes, they are immediately started. If more resource capacity is needed for creating new model allocations, then your {{ml}} node will be scaled up if {{ml}} autoscaling is enabled to provide enough resources for the new allocation. The number of model allocations can be scaled down to 0. They cannot be scaled up to more than 32 allocations, unless you explicitly set the maximum number of allocations to more. Adaptive allocations must be set up independently for each deployment and [{{infer}} endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put). diff --git a/explore-analyze/machine-learning/nlp/ml-nlp-e5.md b/explore-analyze/machine-learning/nlp/ml-nlp-e5.md index 7c7972e31c..98a4d28d73 100644 --- a/explore-analyze/machine-learning/nlp/ml-nlp-e5.md +++ b/explore-analyze/machine-learning/nlp/ml-nlp-e5.md @@ -13,7 +13,7 @@ EmbEddings from bidirEctional Encoder rEpresentations - or E5 - is a {{nlp}} mo [Semantic search](../../../solutions/search/semantic-search.md) provides you search results based on contextual meaning and user intent, rather than exact keyword matches. -E5 has two versions: one cross-platform version which runs on any hardware and one version which is optimized for Intel® silicon. The **Model Management** > **Trained Models** page shows you which version of E5 is recommended to deploy based on your cluster’s hardware. However, the recommended way to use E5 is through the [{{infer}} API](../../../solutions/search/inference-api/elasticsearch-inference-integration.md) as a service which makes it easier to download and deploy the model and you don’t need to select from different versions. +E5 has two versions: one cross-platform version which runs on any hardware and one version which is optimized for Intel® silicon. The **Model Management** > **Trained Models** page shows you which version of E5 is recommended to deploy based on your cluster’s hardware. However, the recommended way to use E5 is through the [{{infer}} API](../../elastic-inference/inference-api/elasticsearch-inference-integration.md) as a service which makes it easier to download and deploy the model and you don’t need to select from different versions. Refer to the model cards of the [multilingual-e5-small](https://huggingface.co/elastic/multilingual-e5-small) and the [multilingual-e5-small-optimized](https://huggingface.co/elastic/multilingual-e5-small-optimized) models on HuggingFace for further information including licensing. @@ -44,7 +44,7 @@ PUT _inference/text_embedding/my-e5-model The API request automatically initiates the model download and then deploy the model. -Refer to the [`elasticsearch` {{infer}} service documentation](../../../solutions/search/inference-api/elasticsearch-inference-integration.md) to learn more about the available settings. +Refer to the [`elasticsearch` {{infer}} service documentation](../../elastic-inference/inference-api/elasticsearch-inference-integration.md) to learn more about the available settings. After you created the E5 {{infer}} endpoint, it’s ready to be used for semantic search. The easiest way to perform semantic search in the {{stack}} is to [follow the `semantic_text` workflow](../../../solutions/search/semantic-search/semantic-search-semantic-text.md). diff --git a/explore-analyze/machine-learning/nlp/ml-nlp-elser.md b/explore-analyze/machine-learning/nlp/ml-nlp-elser.md index 4d01564574..488679f0cb 100644 --- a/explore-analyze/machine-learning/nlp/ml-nlp-elser.md +++ b/explore-analyze/machine-learning/nlp/ml-nlp-elser.md @@ -39,7 +39,7 @@ Enabling trained model autoscaling for your ELSER deployment is recommended. Ref Compared to the initial version of the model, ELSER v2 offers improved retrieval accuracy and more efficient indexing. This enhancement is attributed to the extension of the training data set, which includes high-quality question and answer pairs and the improved FLOPS regularizer which reduces the cost of computing the similarity between a query and a document. -ELSER v2 has two versions: one cross-platform version which runs on any hardware and one version which is optimized for Intel® silicon. The **Model Management** > **Trained Models** page shows you which version of ELSER v2 is recommended to deploy based on your cluster’s hardware. However, the recommended way to use ELSER is through the [{{infer}} API](../../../solutions/search/inference-api/elser-inference-integration.md) as a service which makes it easier to download and deploy the model and you don’t need to select from different versions. +ELSER v2 has two versions: one cross-platform version which runs on any hardware and one version which is optimized for Intel® silicon. The **Model Management** > **Trained Models** page shows you which version of ELSER v2 is recommended to deploy based on your cluster’s hardware. However, the recommended way to use ELSER is through the [{{infer}} API](../../elastic-inference/inference-api/elser-inference-integration.md) as a service which makes it easier to download and deploy the model and you don’t need to select from different versions. If you want to learn more about the ELSER V2 improvements, refer to [this blog post](https://www.elastic.co/search-labs/blog/introducing-elser-v2-part-1). @@ -74,7 +74,7 @@ PUT _inference/sparse_embedding/my-elser-model The API request automatically initiates the model download and then deploy the model. This example uses [autoscaling](ml-nlp-auto-scale.md) through adaptive allocation. -Refer to the [ELSER {{infer}} integration documentation](../../../solutions/search/inference-api/elser-inference-integration.md) to learn more about the available settings. +Refer to the [ELSER {{infer}} integration documentation](../../elastic-inference/inference-api/elser-inference-integration.md) to learn more about the available settings. After you created the ELSER {{infer}} endpoint, it’s ready to be used for semantic search. The easiest way to perform semantic search in the {{stack}} is to [follow the `semantic_text` workflow](../../../solutions/search/semantic-search/semantic-search-semantic-text.md). @@ -306,7 +306,7 @@ To gain the biggest value out of ELSER trained models, consider to follow this l ## Benchmark information [elser-benchmarks] ::::{important} -The recommended way to use ELSER is through the [{{infer}} API](../../../solutions/search/inference-api/elser-inference-integration.md) as a service. +The recommended way to use ELSER is through the [{{infer}} API](../../elastic-inference/inference-api/elser-inference-integration.md) as a service. :::: The following sections provide information about how ELSER performs on different hardwares and compares the model performance to {{es}} BM25 and other strong baselines. diff --git a/explore-analyze/machine-learning/nlp/ml-nlp-overview.md b/explore-analyze/machine-learning/nlp/ml-nlp-overview.md index cc3f42f4bf..a43acfc173 100644 --- a/explore-analyze/machine-learning/nlp/ml-nlp-overview.md +++ b/explore-analyze/machine-learning/nlp/ml-nlp-overview.md @@ -16,7 +16,7 @@ Elastic offers a wide range of possibilities to leverage natural language proces You can **integrate NLP models from different providers** such as Cohere, HuggingFace, or OpenAI and use them as a service through the [semantic_text](../../../solutions/search/semantic-search/semantic-search-semantic-text.md) workflow. You can also use [ELSER](ml-nlp-elser.md) (the retrieval model trained by Elastic) and [E5](ml-nlp-e5.md) in the same way. -The [{{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference) enables you to use the same services with a more complex workflow, for greater control over your configurations settings. This [tutorial](../../../solutions/search/inference-api.md) walks you through the process of using the various services with the {{infer}} API. +The [{{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference) enables you to use the same services with a more complex workflow, for greater control over your configurations settings. This [tutorial](../../elastic-inference/inference-api.md) walks you through the process of using the various services with the {{infer}} API. You can **upload and manage NLP models** using the Eland client and the [{{stack}}](ml-nlp-deploy-models.md). Find the [list of recommended and compatible models here](ml-nlp-model-ref.md). Refer to [*Examples*](ml-nlp-examples.md) to learn more about how to use {{ml}} models deployed in your cluster. diff --git a/explore-analyze/machine-learning/nlp/ml-nlp-rerank.md b/explore-analyze/machine-learning/nlp/ml-nlp-rerank.md index 043873cbfe..f53c75bafa 100644 --- a/explore-analyze/machine-learning/nlp/ml-nlp-rerank.md +++ b/explore-analyze/machine-learning/nlp/ml-nlp-rerank.md @@ -44,7 +44,7 @@ Elastic Rerank is available in Elastic Stack version 8.17+: ## Download and deploy [ml-nlp-rerank-deploy] -To download and deploy Elastic Rerank, use the [create inference API](../../../solutions/search/inference-api/elasticsearch-inference-integration.md) to create an {{es}} service `rerank` endpoint. +To download and deploy Elastic Rerank, use the [create inference API](../../elastic-inference/inference-api/elasticsearch-inference-integration.md) to create an {{es}} service `rerank` endpoint. ::::{tip} Refer to this [Python notebook](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/12-semantic-reranking-elastic-rerank.ipynb) for an end-to-end example using Elastic Rerank. @@ -280,7 +280,7 @@ For detailed benchmark information, including complete dataset results and metho **Documentation**: * [Semantic re-ranking in {{es}} overview](../../../solutions/search/ranking/semantic-reranking.md#semantic-reranking-in-es) -* [Inference API example](../../../solutions/search/inference-api/elasticsearch-inference-integration.md#inference-example-elastic-reranker) +* [Inference API example](../../elastic-inference/inference-api/elasticsearch-inference-integration.md#inference-example-elastic-reranker) **Blogs**: diff --git a/explore-analyze/toc.yml b/explore-analyze/toc.yml index 5192ceb769..038d3480de 100644 --- a/explore-analyze/toc.yml +++ b/explore-analyze/toc.yml @@ -116,6 +116,27 @@ toc: - file: transforms/transform-examples.md - file: transforms/transform-painless-examples.md - file: transforms/transform-limitations.md + - file: elastic-inference.md + children: + - file: elastic-inference/eis.md + - file: elastic-inference/inference-api.md + children: + - file: elastic-inference/inference-api/elastic-inference-service-eis.md + - file: elastic-inference/inference-api/alibabacloud-ai-search-inference-integration.md + - file: elastic-inference/inference-api/amazon-bedrock-inference-integration.md + - file: elastic-inference/inference-api/anthropic-inference-integration.md + - file: elastic-inference/inference-api/azure-ai-studio-inference-integration.md + - file: elastic-inference/inference-api/azure-openai-inference-integration.md + - file: elastic-inference/inference-api/chat-completion-inference-api.md + - file: elastic-inference/inference-api/cohere-inference-integration.md + - file: elastic-inference/inference-api/elasticsearch-inference-integration.md + - file: elastic-inference/inference-api/elser-inference-integration.md + - file: elastic-inference/inference-api/google-ai-studio-inference-integration.md + - file: elastic-inference/inference-api/google-vertex-ai-inference-integration.md + - file: elastic-inference/inference-api/huggingface-inference-integration.md + - file: elastic-inference/inference-api/jinaai-inference-integration.md + - file: elastic-inference/inference-api/mistral-inference-integration.md + - file: elastic-inference/inference-api/openai-inference-integration.md - file: machine-learning.md children: - file: machine-learning/setting-up-machine-learning.md diff --git a/raw-migrated-files/docs-content/serverless/general-ml-nlp-auto-scale.md b/raw-migrated-files/docs-content/serverless/general-ml-nlp-auto-scale.md index e0f4f363f9..466bed89d9 100644 --- a/raw-migrated-files/docs-content/serverless/general-ml-nlp-auto-scale.md +++ b/raw-migrated-files/docs-content/serverless/general-ml-nlp-auto-scale.md @@ -28,7 +28,7 @@ If you set the minimum number of allocations to 1, you will be charged even if t You can enable adaptive allocations by using: -* the create inference endpoint API for [ELSER](../../../solutions/search/inference-api/elser-inference-integration.md), [E5 and models uploaded through Eland](../../../solutions/search/inference-api/elasticsearch-inference-integration.md) that are used as inference services. +* the create inference endpoint API for [ELSER](../../../explore-analyze/elastic-inference/inference-api/elser-inference-integration.md ), [E5 and models uploaded through Eland](../../../explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md) that are used as inference services. * the [start trained model deployment](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-start-trained-model-deployment) or [update trained model deployment](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-update-trained-model-deployment) APIs for trained models that are deployed on machine learning nodes. If the new allocations fit on the current machine learning nodes, they are immediately started. If more resource capacity is needed for creating new model allocations, then your machine learning node will be scaled up if machine learning autoscaling is enabled to provide enough resources for the new allocation. The number of model allocations can be scaled down to 0. They cannot be scaled up to more than 32 allocations, unless you explicitly set the maximum number of allocations to more. Adaptive allocations must be set up independently for each deployment and [inference endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put). diff --git a/raw-migrated-files/elasticsearch/elasticsearch-reference/semantic-search-inference.md b/raw-migrated-files/elasticsearch/elasticsearch-reference/semantic-search-inference.md index 1410a4d586..e89ccbbb8d 100644 --- a/raw-migrated-files/elasticsearch/elasticsearch-reference/semantic-search-inference.md +++ b/raw-migrated-files/elasticsearch/elasticsearch-reference/semantic-search-inference.md @@ -23,7 +23,7 @@ The following examples use the: * `amazon.titan-embed-text-v1` model for [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.md) * `ops-text-embedding-zh-001` model for [AlibabaCloud AI](https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details) -You can use any Cohere and OpenAI models, they are all supported by the {{infer}} API. For a list of recommended models available on HuggingFace, refer to [the supported model list](../../../solutions/search/inference-api/huggingface-inference-integration.md#inference-example-hugging-face-supported-models). +You can use any Cohere and OpenAI models, they are all supported by the {{infer}} API. For a list of recommended models available on HuggingFace, refer to [the supported model list](../../../explore-analyze/elastic-inference/inference-api/huggingface-inference-integration.md). Click the name of the service you want to use on any of the widgets below to review the corresponding instructions. diff --git a/solutions/search/hybrid-semantic-text.md b/solutions/search/hybrid-semantic-text.md index 79fd2be5d1..ab2de53a4e 100644 --- a/solutions/search/hybrid-semantic-text.md +++ b/solutions/search/hybrid-semantic-text.md @@ -14,7 +14,7 @@ This tutorial demonstrates how to perform hybrid search, combining semantic sear In hybrid search, semantic search retrieves results based on the meaning of the text, while full-text search focuses on exact word matches. By combining both methods, hybrid search delivers more relevant results, particularly in cases where relying on a single approach may not be sufficient. -The recommended way to use hybrid search in the {{stack}} is following the `semantic_text` workflow. This tutorial uses the [`elasticsearch` service](inference-api/elasticsearch-inference-integration.md) for demonstration, but you can use any service and their supported models offered by the {{infer-cap}} API. +The recommended way to use hybrid search in the {{stack}} is following the `semantic_text` workflow. This tutorial uses the [`elasticsearch` service](../../explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md) for demonstration, but you can use any service and their supported models offered by the {{infer-cap}} API. ## Create an index mapping [hybrid-search-create-index-mapping] diff --git a/solutions/search/inference-api/watsonx-inference-integration.md b/solutions/search/inference-api/watsonx-inference-integration.md deleted file mode 100644 index 2b5c745fff..0000000000 --- a/solutions/search/inference-api/watsonx-inference-integration.md +++ /dev/null @@ -1,149 +0,0 @@ ---- -navigation_title: "Watsonx" -mapped_pages: - - https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html -applies_to: - stack: - serverless: ---- - -# Watsonx inference integration [infer-service-watsonx-ai] - -:::{tip} Inference API reference -Refer to the [{{infer-cap}} APIs](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference) for further information. -::: - -Creates an {{infer}} endpoint to perform an {{infer}} task with the `watsonxai` service. - -You need an [IBM Cloud® Databases for Elasticsearch deployment](https://cloud.ibm.com/docs/databases-for-elasticsearch?topic=databases-for-elasticsearch-provisioning&interface=api) to use the `watsonxai` {{infer}} service. You can provision one through the [IBM catalog](https://cloud.ibm.com/databases/databases-for-elasticsearch/create), the [Cloud Databases CLI plug-in](https://cloud.ibm.com/docs/databases-cli-plugin?topic=databases-cli-plugin-cdb-reference), the [Cloud Databases API](https://cloud.ibm.com/apidocs/cloud-databases-api), or [Terraform](https://registry.terraform.io/providers/IBM-Cloud/ibm/latest/docs/resources/database). - - -## {{api-request-title}} [infer-service-watsonx-ai-api-request] - -`PUT /_inference//` - - -## {{api-path-parms-title}} [infer-service-watsonx-ai-api-path-params] - -`` -: (Required, string) The unique identifier of the {{infer}} endpoint. - -`` -: (Required, string) The type of the {{infer}} task that the model will perform. - - Available task types: - - * `text_embedding`, - * `rerank`. - -## {{api-request-body-title}} [infer-service-watsonx-ai-api-request-body] - -`service` -: (Required, string) The type of service supported for the specified task type. In this case, `watsonxai`. - -`service_settings` -: (Required, object) Settings used to install the {{infer}} model. - - These settings are specific to the `watsonxai` service. - - `api_key` - : (Required, string) A valid API key of your Watsonx account. You can find your Watsonx API keys or you can create a new one [on the API keys page](https://cloud.ibm.com/iam/apikeys). - - ::::{important} - You need to provide the API key only once, during the {{infer}} model creation. The [Get {{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get) does not retrieve your API key. After creating the {{infer}} model, you cannot change the associated API key. If you want to use a different API key, delete the {{infer}} model and recreate it with the same name and the updated API key. - :::: - - - `api_version` - : (Required, string) Version parameter that takes a version date in the format of `YYYY-MM-DD`. For the active version data parameters, refer to the [documentation](https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates). - - `model_id` - : (Required, string) The name of the model to use for the {{infer}} task. Refer to the IBM Embedding Models section in the [Watsonx documentation](https://www.ibm.com/products/watsonx-ai/foundation-models) for the list of available text embedding models. - - `url` - : (Required, string) The URL endpoint to use for the requests. - - `project_id` - : (Required, string) The name of the project to use for the {{infer}} task. - - `rate_limit` - : (Optional, object) By default, the `watsonxai` service sets the number of requests allowed per minute to `120`. This helps to minimize the number of rate limit errors returned from Watsonx. To modify this, set the `requests_per_minute` setting of this object in your service settings: - -```json -"rate_limit": { - "requests_per_minute": <> -} -``` - -`task_settings` -: (Optional, object) Settings to configure the inference task. - - These settings are specific to the `` you specified. - -::::{dropdown} `task_settings` for the `rerank` task type -`truncate_input_tokens` -: (Optional, integer) Specifies the maximum number of tokens per input document before truncation. - -`return_documents` -: (Optional, boolean) Specify whether to return doc text within the results. - -`top_n` -: (Optional, integer) The number of most relevant documents to return. Defaults to the number of input documents. - -:::: - -## Watsonx AI service example [inference-example-watsonx-ai] - -The following example shows how to create an {{infer}} endpoint called `watsonx-embeddings` to perform a `text_embedding` task type. - -```console -PUT _inference/text_embedding/watsonx-embeddings -{ - "service": "watsonxai", - "service_settings": { - "api_key": "", <1> - "url": "", <2> - "model_id": "ibm/slate-30m-english-rtrvr", - "project_id": "", <3> - "api_version": "2024-03-14" <4> - } -} -``` - -1. A valid Watsonx API key. You can find on the [API keys page of your account](https://cloud.ibm.com/iam/apikeys). -2. The {{infer}} endpoint URL you created on Watsonx. -3. The ID of your IBM Cloud project. -4. A valid API version parameter. You can find the active version data parameters [here](https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates). - -The following example shows how to create an inference endpoint called `watsonx-rerank` to perform a `rerank` task type. - -```console - -PUT _inference/rerank/watsonx-rerank -{ - "service": "watsonxai", - "service_settings": { - "api_key": "", <1> - "url": "", <2> - "model_id": "cross-encoder/ms-marco-minilm-l-12-v2", - "project_id": "", <3> - "api_version": "2024-05-02" <4> - }, - "task_settings": { - "truncate_input_tokens": 50, <5> - "return_documents": true, <6> - "top_n": 3 <7> - } -} -``` - -1. A valid Watsonx API key. You can find on the [API keys page of your account](https://cloud.ibm.com/iam/apikeys). -2. The {{infer}} endpoint URL you created on Watsonx. -3. The ID of your IBM Cloud project. -4. A valid API version parameter. You can find the active version data parameters [here](https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates). -5. The maximum number of tokens per document before truncation. -6. Whether to return the document text in the results. -7. The number of top relevant documents to return. - - - diff --git a/solutions/search/ranking/semantic-reranking.md b/solutions/search/ranking/semantic-reranking.md index 133846cb85..0a48f51d5f 100644 --- a/solutions/search/ranking/semantic-reranking.md +++ b/solutions/search/ranking/semantic-reranking.md @@ -92,10 +92,10 @@ To use semantic re-ranking in {{es}}, you need to: 1. **Select and configure a re-ranking model**. You have the following options: - 1. Use the [Elastic Rerank](../inference-api/elasticsearch-inference-integration.md#inference-example-elastic-reranker) cross-encoder model via the inference API’s {{es}} service. - 2. Use the [Cohere Rerank inference endpoint](../inference-api/cohere-inference-integration.md) to create a `rerank` endpoint. - 3. Use the [Google Vertex AI inference endpoint](../inference-api/google-vertex-ai-inference-integration.md) to create a `rerank` endpoint. - 4. Upload a model to {{es}} from Hugging Face with [Eland](asciidocalypse://docs/eland/docs/reference/machine-learning.md#ml-nlp-pytorch). You’ll need to use the `text_similarity` NLP task type when loading the model using Eland. Then set up an [{{es}} service inference endpoint](../inference-api/elasticsearch-inference-integration.md#inference-example-eland) with the `rerank` endpoint type. + 1. Use the [Elastic Rerank](../../../explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md#inference-example-elastic-reranker) cross-encoder model via the inference API’s {{es}} service. + 2. Use the [Cohere Rerank inference endpoint](../../../explore-analyze/elastic-inference/inference-api/cohere-inference-integration.md) to create a `rerank` endpoint. + 3. Use the [Google Vertex AI inference endpoint](../../../explore-analyze/elastic-inference/inference-api/google-vertex-ai-inference-integration.md) to create a `rerank` endpoint. + 4. Upload a model to {{es}} from Hugging Face with [Eland](asciidocalypse://docs/eland/docs/reference/machine-learning.md#ml-nlp-pytorch). You’ll need to use the `text_similarity` NLP task type when loading the model using Eland. Then set up an [{{es}} service inference endpoint](../../../explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md#inference-example-eland) with the `rerank` endpoint type. Refer to [the Elastic NLP model reference](../../../explore-analyze/machine-learning/nlp/ml-nlp-model-ref.md#ml-nlp-model-ref-text-similarity) for a list of third party text similarity models supported by {{es}} for semantic re-ranking. diff --git a/solutions/search/semantic-search.md b/solutions/search/semantic-search.md index fbf5b66b14..97818b1e64 100644 --- a/solutions/search/semantic-search.md +++ b/solutions/search/semantic-search.md @@ -44,7 +44,7 @@ For an end-to-end tutorial, refer to [Semantic search with `semantic_text`](sema The {{infer}} API workflow is more complex but offers greater control over the {{infer}} endpoint configuration. You need to create an {{infer}} endpoint, provide various model-related settings and parameters, define an index mapping, and set up an {{infer}} ingest pipeline with the appropriate settings. -For an end-to-end tutorial, refer to [Semantic search with the {{infer}} API](inference-api.md). +For an end-to-end tutorial, refer to [Semantic search with the {{infer}} API](../../explore-analyze/elastic-inference/inference-api.md). ### Option 3: Manual model deployment [_model_deployment_workflow] diff --git a/solutions/search/semantic-search/semantic-search-inference.md b/solutions/search/semantic-search/semantic-search-inference.md index d97ada57cc..587b500361 100644 --- a/solutions/search/semantic-search/semantic-search-inference.md +++ b/solutions/search/semantic-search/semantic-search-inference.md @@ -28,7 +28,7 @@ The following examples use the: * `amazon.titan-embed-text-v1` model for [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html) * `ops-text-embedding-zh-001` model for [AlibabaCloud AI](https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details) -You can use any Cohere and OpenAI models, they are all supported by the {{infer}} API. For a list of recommended models available on HuggingFace, refer to [the supported model list](../inference-api/huggingface-inference-integration.md#inference-example-hugging-face-supported-models). +You can use any Cohere and OpenAI models, they are all supported by the {{infer}} API. For a list of recommended models available on HuggingFace, refer to [the supported model list](../../../explore-analyze/elastic-inference/inference-api/huggingface-inference-integration.md#inference-example-hugging-face-supported-models). Click the name of the service you want to use on any of the widgets below to review the corresponding instructions. diff --git a/solutions/search/semantic-search/semantic-search-semantic-text.md b/solutions/search/semantic-search/semantic-search-semantic-text.md index 7f0f4c49a5..7356f31075 100644 --- a/solutions/search/semantic-search/semantic-search-semantic-text.md +++ b/solutions/search/semantic-search/semantic-search-semantic-text.md @@ -19,14 +19,14 @@ This tutorial shows you how to use the semantic text feature to perform semantic Semantic text simplifies the {{infer}} workflow by providing {{infer}} at ingestion time and sensible default values automatically. You don’t need to define model related settings and parameters, or create {{infer}} ingest pipelines. -The recommended way to use [semantic search](../semantic-search.md) in the {{stack}} is following the `semantic_text` workflow. When you need more control over indexing and query settings, you can still use the complete {{infer}} workflow (refer to [this tutorial](../inference-api.md) to review the process). +The recommended way to use [semantic search](../semantic-search.md) in the {{stack}} is following the `semantic_text` workflow. When you need more control over indexing and query settings, you can still use the complete {{infer}} workflow (refer to [this tutorial](../../../explore-analyze/elastic-inference/inference-api.md) to review the process). -This tutorial uses the [`elasticsearch` service](../inference-api/elasticsearch-inference-integration.md) for demonstration, but you can use any service and their supported models offered by the {{infer-cap}} API. +This tutorial uses the [`elasticsearch` service](../../../explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md) for demonstration, but you can use any service and their supported models offered by the {{infer-cap}} API. ## Requirements [semantic-text-requirements] -This tutorial uses the [`elasticsearch` service](../inference-api/elasticsearch-inference-integration.md) for demonstration, which is created automatically as needed. To use the `semantic_text` field type with an {{infer}} service other than `elasticsearch` service, you must create an inference endpoint using the [Create {{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put). +This tutorial uses the [`elasticsearch` service](../../../explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md) for demonstration, which is created automatically as needed. To use the `semantic_text` field type with an {{infer}} service other than `elasticsearch` service, you must create an inference endpoint using the [Create {{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put). ## Create the index mapping [semantic-text-index-mapping] @@ -47,7 +47,7 @@ PUT semantic-embeddings ``` 1. The name of the field to contain the generated embeddings. -2. The field to contain the embeddings is a `semantic_text` field. Since no `inference_id` is provided, the default endpoint `.elser-2-elasticsearch` for the [`elasticsearch` service](../inference-api/elasticsearch-inference-integration.md) is used. To use a different {{infer}} service, you must create an {{infer}} endpoint first using the [Create {{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) and then specify it in the `semantic_text` field mapping using the `inference_id` parameter. +2. The field to contain the embeddings is a `semantic_text` field. Since no `inference_id` is provided, the default endpoint `.elser-2-elasticsearch` for the [`elasticsearch` service](../../../explore-analyze/elastic-inference/inference-api/elasticsearch-inference-integration.md) is used. To use a different {{infer}} service, you must create an {{infer}} endpoint first using the [Create {{infer}} API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) and then specify it in the `semantic_text` field mapping using the `inference_id` parameter. ::::{note} diff --git a/solutions/toc.yml b/solutions/toc.yml index 81372e1471..ed9f6052f3 100644 --- a/solutions/toc.yml +++ b/solutions/toc.yml @@ -42,25 +42,6 @@ toc: - file: search/semantic-search/semantic-search-inference.md - file: search/semantic-search/semantic-search-elser-ingest-pipelines.md - file: search/semantic-search/cohere-es.md - - file: search/inference-api.md - children: - - file: search/inference-api/elastic-inference-service-eis.md - - file: search/inference-api/alibabacloud-ai-search-inference-integration.md - - file: search/inference-api/amazon-bedrock-inference-integration.md - - file: search/inference-api/anthropic-inference-integration.md - - file: search/inference-api/azure-ai-studio-inference-integration.md - - file: search/inference-api/azure-openai-inference-integration.md - - file: search/inference-api/chat-completion-inference-api.md - - file: search/inference-api/cohere-inference-integration.md - - file: search/inference-api/elasticsearch-inference-integration.md - - file: search/inference-api/elser-inference-integration.md - - file: search/inference-api/google-ai-studio-inference-integration.md - - file: search/inference-api/google-vertex-ai-inference-integration.md - - file: search/inference-api/huggingface-inference-integration.md - - file: search/inference-api/jinaai-inference-integration.md - - file: search/inference-api/mistral-inference-integration.md - - file: search/inference-api/openai-inference-integration.md - - file: search/inference-api/watsonx-inference-integration.md - file: search/rag.md children: - file: search/rag/playground.md