diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md b/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md index 0a4d8de5a204e..c545a82ff4389 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md @@ -53,6 +53,7 @@ Defaults to `{REPO_ROOT}/build/temp-kb-artifacts` `sourceClusterUrl` / env.KIBANA_SOURCE_CLUSTER_URL `sourceClusterUsername` / env.KIBANA_SOURCE_CLUSTER_USERNAME `sourceClusterPassword` / env.KIBANA_SOURCE_CLUSTER_PASSWORD + sourceClusterIndex / env.KIBANA_SOURCE_INDEX - params for the embedding cluster: `embeddingClusterUrl` / env.KIBANA_EMBEDDING_CLUSTER_URL diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts index 371c72bd02aef..c83c13e48843c 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts @@ -40,6 +40,12 @@ const getSourceClient = (config: TaskConfig) => { }, Connection: Elasticsearch8HttpConnection, requestTimeout: 30_000, + ssl: { + rejectUnauthorized: false, + }, + tls: { + rejectUnauthorized: false, + }, }); }; @@ -54,6 +60,12 @@ const getEmbeddingClient = (config: TaskConfig) => { // generating embeddings takes time requestTimeout: 10 * 60 * 1000, Connection: HttpConnection, + ssl: { + rejectUnauthorized: false, + }, + tls: { + rejectUnauthorized: false, + }, }); }; @@ -84,6 +96,7 @@ export const buildArtifacts = async (config: TaskConfig) => { buildFolder: config.buildFolder, targetFolder: config.targetFolder, sourceClient, + sourceClusterIndex: config.sourceClusterIndex, embeddingClient, log, inferenceId: config.inferenceId ?? defaultInferenceEndpoints.ELSER, @@ -102,6 +115,7 @@ const buildArtifact = async ({ sourceClient, log, inferenceId, + sourceClusterIndex = 'connector-prod-s3-doc-content-v1', }: { productName: ProductName; stackVersion: string; @@ -111,6 +125,7 @@ const buildArtifact = async ({ embeddingClient: Client; log: ToolingLog; inferenceId: string; + sourceClusterIndex?: string; }) => { log.info( `Starting building artifact for product [${productName}] and version [${stackVersion}] with inference id [${inferenceId}]` @@ -139,7 +154,7 @@ const buildArtifact = async ({ let documents = await extractDocumentation({ client: sourceClient, - index: 'search-docs-1', + index: sourceClusterIndex ?? 'connector-prod-s3-doc-content-v1', log, productName, stackVersion, diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts index 7e4ebda200f25..7829aa962e26b 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts @@ -53,6 +53,12 @@ function options(y: yargs.Argv) { demandOption: true, default: process.env.KIBANA_SOURCE_CLUSTER_PASSWORD, }) + .option('sourceClusterIndex', { + describe: 'The source cluster index', + string: true, + demandOption: true, + default: process.env.KIBANA_SOURCE_INDEX, + }) .option('embeddingClusterUrl', { describe: 'The embedding cluster url', string: true, @@ -90,6 +96,7 @@ export function runScript() { sourceClusterUrl: argv.sourceClusterUrl!, sourceClusterUsername: argv.sourceClusterUsername!, sourceClusterPassword: argv.sourceClusterPassword!, + sourceClusterIndex: argv.sourceClusterIndex!, embeddingClusterUrl: argv.embeddingClusterUrl!, embeddingClusterUsername: argv.embeddingClusterUsername!, embeddingClusterPassword: argv.embeddingClusterPassword!, diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts index 5c9cec4609805..e44672caa6231 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts @@ -9,7 +9,6 @@ import type { Client as ElasticsearchClient8 } from 'elasticsearch-8.x'; import type { SearchHit } from '@elastic/elasticsearch/lib/api/types'; import type { ToolingLog } from '@kbn/tooling-log'; import type { ProductName } from '@kbn/product-doc-common'; -import { getSourceNamesFromProductName, getProductNameFromSource } from '../artifact/product_name'; /** the list of fields to import from the source cluster */ const fields = [ @@ -40,12 +39,12 @@ export interface ExtractedDocument { ai_tags: string[]; } -const convertHit = (hit: SearchHit): ExtractedDocument => { +const convertHit = (hit: SearchHit, productName: ProductName): ExtractedDocument => { const source = hit._source; return { content_title: source.content_title, content_body: source.content_body, - product_name: getProductNameFromSource(source.product_name), + product_name: productName, root_type: 'documentation', slug: source.slug, url: source.url, @@ -57,11 +56,43 @@ const convertHit = (hit: SearchHit): ExtractedDocument => { }; }; +const generateSearchCriteriaForProduct = (productName: ProductName) => { + if (productName.toLowerCase() === 'elasticsearch') { + return { + bool: { + minimum_should_match: 1, + should: [ + { + match_phrase: { + filename: '*solutions/search*', + }, + }, + { + wildcard: { + product_name: { + case_insensitive: true, + value: 'elasticsearch', + }, + }, + }, + ], + }, + }; + } + return { + wildcard: { + filename: { + value: `*${productName}*`, + case_insensitive: false, + }, + }, + }; +}; export const extractDocumentation = async ({ client, index, stackVersion, - productName, + productName: productNameParam, log, }: { client: ElasticsearchClient8; @@ -72,20 +103,30 @@ export const extractDocumentation = async ({ }) => { log.info(`Starting to extract documents from source cluster`); - const response = await client.search({ + const productName = productNameParam.toLowerCase(); + const query = { index, size: 10000, query: { bool: { must: [ - { terms: { product_name: getSourceNamesFromProductName(productName) } }, - { term: { version: stackVersion } }, - { exists: { field: 'ai_fields.ai_summary' } }, + { + bool: { + should: [generateSearchCriteriaForProduct(productName)], + }, + }, + { + exists: { + field: 'ai_fields.ai_summary', + }, + }, ], }, }, fields, - }); + }; + + const response = await client.search(query); const totalHits = typeof response.hits.total === 'number' @@ -100,5 +141,5 @@ export const extractDocumentation = async ({ `Finished extracting documents from source. ${response.hits.hits.length} documents were extracted` ); - return response.hits.hits.map(convertHit); + return response.hits.hits.map((hit) => convertHit(hit, productName)); };