diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md b/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md index c545a82ff4389..0a4d8de5a204e 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md @@ -53,7 +53,6 @@ Defaults to `{REPO_ROOT}/build/temp-kb-artifacts` `sourceClusterUrl` / env.KIBANA_SOURCE_CLUSTER_URL `sourceClusterUsername` / env.KIBANA_SOURCE_CLUSTER_USERNAME `sourceClusterPassword` / env.KIBANA_SOURCE_CLUSTER_PASSWORD - sourceClusterIndex / env.KIBANA_SOURCE_INDEX - params for the embedding cluster: `embeddingClusterUrl` / env.KIBANA_EMBEDDING_CLUSTER_URL diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts index c83c13e48843c..371c72bd02aef 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts @@ -40,12 +40,6 @@ const getSourceClient = (config: TaskConfig) => { }, Connection: Elasticsearch8HttpConnection, requestTimeout: 30_000, - ssl: { - rejectUnauthorized: false, - }, - tls: { - rejectUnauthorized: false, - }, }); }; @@ -60,12 +54,6 @@ const getEmbeddingClient = (config: TaskConfig) => { // generating embeddings takes time requestTimeout: 10 * 60 * 1000, Connection: HttpConnection, - ssl: { - rejectUnauthorized: false, - }, - tls: { - rejectUnauthorized: false, - }, }); }; @@ -96,7 +84,6 @@ export const buildArtifacts = async (config: TaskConfig) => { buildFolder: config.buildFolder, targetFolder: config.targetFolder, sourceClient, - sourceClusterIndex: config.sourceClusterIndex, embeddingClient, log, inferenceId: config.inferenceId ?? defaultInferenceEndpoints.ELSER, @@ -115,7 +102,6 @@ const buildArtifact = async ({ sourceClient, log, inferenceId, - sourceClusterIndex = 'connector-prod-s3-doc-content-v1', }: { productName: ProductName; stackVersion: string; @@ -125,7 +111,6 @@ const buildArtifact = async ({ embeddingClient: Client; log: ToolingLog; inferenceId: string; - sourceClusterIndex?: string; }) => { log.info( `Starting building artifact for product [${productName}] and version [${stackVersion}] with inference id [${inferenceId}]` @@ -154,7 +139,7 @@ const buildArtifact = async ({ let documents = await extractDocumentation({ client: sourceClient, - index: sourceClusterIndex ?? 'connector-prod-s3-doc-content-v1', + index: 'search-docs-1', log, productName, stackVersion, diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts index 7829aa962e26b..7e4ebda200f25 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts @@ -53,12 +53,6 @@ function options(y: yargs.Argv) { demandOption: true, default: process.env.KIBANA_SOURCE_CLUSTER_PASSWORD, }) - .option('sourceClusterIndex', { - describe: 'The source cluster index', - string: true, - demandOption: true, - default: process.env.KIBANA_SOURCE_INDEX, - }) .option('embeddingClusterUrl', { describe: 'The embedding cluster url', string: true, @@ -96,7 +90,6 @@ export function runScript() { sourceClusterUrl: argv.sourceClusterUrl!, sourceClusterUsername: argv.sourceClusterUsername!, sourceClusterPassword: argv.sourceClusterPassword!, - sourceClusterIndex: argv.sourceClusterIndex!, embeddingClusterUrl: argv.embeddingClusterUrl!, embeddingClusterUsername: argv.embeddingClusterUsername!, embeddingClusterPassword: argv.embeddingClusterPassword!, diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts index e44672caa6231..5c9cec4609805 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts @@ -9,6 +9,7 @@ import type { Client as ElasticsearchClient8 } from 'elasticsearch-8.x'; import type { SearchHit } from '@elastic/elasticsearch/lib/api/types'; import type { ToolingLog } from '@kbn/tooling-log'; import type { ProductName } from '@kbn/product-doc-common'; +import { getSourceNamesFromProductName, getProductNameFromSource } from '../artifact/product_name'; /** the list of fields to import from the source cluster */ const fields = [ @@ -39,12 +40,12 @@ export interface ExtractedDocument { ai_tags: string[]; } -const convertHit = (hit: SearchHit, productName: ProductName): ExtractedDocument => { +const convertHit = (hit: SearchHit): ExtractedDocument => { const source = hit._source; return { content_title: source.content_title, content_body: source.content_body, - product_name: productName, + product_name: getProductNameFromSource(source.product_name), root_type: 'documentation', slug: source.slug, url: source.url, @@ -56,43 +57,11 @@ const convertHit = (hit: SearchHit, productName: ProductName): ExtractedDoc }; }; -const generateSearchCriteriaForProduct = (productName: ProductName) => { - if (productName.toLowerCase() === 'elasticsearch') { - return { - bool: { - minimum_should_match: 1, - should: [ - { - match_phrase: { - filename: '*solutions/search*', - }, - }, - { - wildcard: { - product_name: { - case_insensitive: true, - value: 'elasticsearch', - }, - }, - }, - ], - }, - }; - } - return { - wildcard: { - filename: { - value: `*${productName}*`, - case_insensitive: false, - }, - }, - }; -}; export const extractDocumentation = async ({ client, index, stackVersion, - productName: productNameParam, + productName, log, }: { client: ElasticsearchClient8; @@ -103,30 +72,20 @@ export const extractDocumentation = async ({ }) => { log.info(`Starting to extract documents from source cluster`); - const productName = productNameParam.toLowerCase(); - const query = { + const response = await client.search({ index, size: 10000, query: { bool: { must: [ - { - bool: { - should: [generateSearchCriteriaForProduct(productName)], - }, - }, - { - exists: { - field: 'ai_fields.ai_summary', - }, - }, + { terms: { product_name: getSourceNamesFromProductName(productName) } }, + { term: { version: stackVersion } }, + { exists: { field: 'ai_fields.ai_summary' } }, ], }, }, fields, - }; - - const response = await client.search(query); + }); const totalHits = typeof response.hits.total === 'number' @@ -141,5 +100,5 @@ export const extractDocumentation = async ({ `Finished extracting documents from source. ${response.hits.hits.length} documents were extracted` ); - return response.hits.hits.map((hit) => convertHit(hit, productName)); + return response.hits.hits.map(convertHit); };