From 915d4cd4a28cb6721b67b49096aedef4269e512d Mon Sep 17 00:00:00 2001 From: "Quynh Nguyen (Quinn)" <43350163+qn895@users.noreply.github.com> Date: Fri, 23 Jan 2026 18:22:01 -0600 Subject: [PATCH] =?UTF-8?q?Revert=20"[AI=20Infra]=20Update=20scripts=20for?= =?UTF-8?q?=20extracting=20product=20documentation=20artifa=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 926b32b7c150f68856efedacd3836609bd93473b. --- .../product-doc-artifact-builder/README.md | 1 - .../src/build_artifacts.ts | 17 +----- .../src/command.ts | 7 --- .../src/tasks/extract_documentation.ts | 61 +++---------------- 4 files changed, 11 insertions(+), 75 deletions(-) diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md b/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md index c545a82ff4389..0a4d8de5a204e 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/README.md @@ -53,7 +53,6 @@ Defaults to `{REPO_ROOT}/build/temp-kb-artifacts` `sourceClusterUrl` / env.KIBANA_SOURCE_CLUSTER_URL `sourceClusterUsername` / env.KIBANA_SOURCE_CLUSTER_USERNAME `sourceClusterPassword` / env.KIBANA_SOURCE_CLUSTER_PASSWORD - sourceClusterIndex / env.KIBANA_SOURCE_INDEX - params for the embedding cluster: `embeddingClusterUrl` / env.KIBANA_EMBEDDING_CLUSTER_URL diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts index c83c13e48843c..371c72bd02aef 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts @@ -40,12 +40,6 @@ const getSourceClient = (config: TaskConfig) => { }, Connection: Elasticsearch8HttpConnection, requestTimeout: 30_000, - ssl: { - rejectUnauthorized: false, - }, - tls: { - rejectUnauthorized: false, - }, }); }; @@ -60,12 +54,6 @@ const getEmbeddingClient = (config: TaskConfig) => { // generating embeddings takes time requestTimeout: 10 * 60 * 1000, Connection: HttpConnection, - ssl: { - rejectUnauthorized: false, - }, - tls: { - rejectUnauthorized: false, - }, }); }; @@ -96,7 +84,6 @@ export const buildArtifacts = async (config: TaskConfig) => { buildFolder: config.buildFolder, targetFolder: config.targetFolder, sourceClient, - sourceClusterIndex: config.sourceClusterIndex, embeddingClient, log, inferenceId: config.inferenceId ?? defaultInferenceEndpoints.ELSER, @@ -115,7 +102,6 @@ const buildArtifact = async ({ sourceClient, log, inferenceId, - sourceClusterIndex = 'connector-prod-s3-doc-content-v1', }: { productName: ProductName; stackVersion: string; @@ -125,7 +111,6 @@ const buildArtifact = async ({ embeddingClient: Client; log: ToolingLog; inferenceId: string; - sourceClusterIndex?: string; }) => { log.info( `Starting building artifact for product [${productName}] and version [${stackVersion}] with inference id [${inferenceId}]` @@ -154,7 +139,7 @@ const buildArtifact = async ({ let documents = await extractDocumentation({ client: sourceClient, - index: sourceClusterIndex ?? 'connector-prod-s3-doc-content-v1', + index: 'search-docs-1', log, productName, stackVersion, diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts index 7829aa962e26b..7e4ebda200f25 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/command.ts @@ -53,12 +53,6 @@ function options(y: yargs.Argv) { demandOption: true, default: process.env.KIBANA_SOURCE_CLUSTER_PASSWORD, }) - .option('sourceClusterIndex', { - describe: 'The source cluster index', - string: true, - demandOption: true, - default: process.env.KIBANA_SOURCE_INDEX, - }) .option('embeddingClusterUrl', { describe: 'The embedding cluster url', string: true, @@ -96,7 +90,6 @@ export function runScript() { sourceClusterUrl: argv.sourceClusterUrl!, sourceClusterUsername: argv.sourceClusterUsername!, sourceClusterPassword: argv.sourceClusterPassword!, - sourceClusterIndex: argv.sourceClusterIndex!, embeddingClusterUrl: argv.embeddingClusterUrl!, embeddingClusterUsername: argv.embeddingClusterUsername!, embeddingClusterPassword: argv.embeddingClusterPassword!, diff --git a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts index e44672caa6231..5c9cec4609805 100644 --- a/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts +++ b/x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/extract_documentation.ts @@ -9,6 +9,7 @@ import type { Client as ElasticsearchClient8 } from 'elasticsearch-8.x'; import type { SearchHit } from '@elastic/elasticsearch/lib/api/types'; import type { ToolingLog } from '@kbn/tooling-log'; import type { ProductName } from '@kbn/product-doc-common'; +import { getSourceNamesFromProductName, getProductNameFromSource } from '../artifact/product_name'; /** the list of fields to import from the source cluster */ const fields = [ @@ -39,12 +40,12 @@ export interface ExtractedDocument { ai_tags: string[]; } -const convertHit = (hit: SearchHit, productName: ProductName): ExtractedDocument => { +const convertHit = (hit: SearchHit): ExtractedDocument => { const source = hit._source; return { content_title: source.content_title, content_body: source.content_body, - product_name: productName, + product_name: getProductNameFromSource(source.product_name), root_type: 'documentation', slug: source.slug, url: source.url, @@ -56,43 +57,11 @@ const convertHit = (hit: SearchHit, productName: ProductName): ExtractedDoc }; }; -const generateSearchCriteriaForProduct = (productName: ProductName) => { - if (productName.toLowerCase() === 'elasticsearch') { - return { - bool: { - minimum_should_match: 1, - should: [ - { - match_phrase: { - filename: '*solutions/search*', - }, - }, - { - wildcard: { - product_name: { - case_insensitive: true, - value: 'elasticsearch', - }, - }, - }, - ], - }, - }; - } - return { - wildcard: { - filename: { - value: `*${productName}*`, - case_insensitive: false, - }, - }, - }; -}; export const extractDocumentation = async ({ client, index, stackVersion, - productName: productNameParam, + productName, log, }: { client: ElasticsearchClient8; @@ -103,30 +72,20 @@ export const extractDocumentation = async ({ }) => { log.info(`Starting to extract documents from source cluster`); - const productName = productNameParam.toLowerCase(); - const query = { + const response = await client.search({ index, size: 10000, query: { bool: { must: [ - { - bool: { - should: [generateSearchCriteriaForProduct(productName)], - }, - }, - { - exists: { - field: 'ai_fields.ai_summary', - }, - }, + { terms: { product_name: getSourceNamesFromProductName(productName) } }, + { term: { version: stackVersion } }, + { exists: { field: 'ai_fields.ai_summary' } }, ], }, }, fields, - }; - - const response = await client.search(query); + }); const totalHits = typeof response.hits.total === 'number' @@ -141,5 +100,5 @@ export const extractDocumentation = async ({ `Finished extracting documents from source. ${response.hits.hits.length} documents were extracted` ); - return response.hits.hits.map((hit) => convertHit(hit, productName)); + return response.hits.hits.map(convertHit); };