Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Defaults to `{REPO_ROOT}/build/temp-kb-artifacts`
`sourceClusterUrl` / env.KIBANA_SOURCE_CLUSTER_URL
`sourceClusterUsername` / env.KIBANA_SOURCE_CLUSTER_USERNAME
`sourceClusterPassword` / env.KIBANA_SOURCE_CLUSTER_PASSWORD
sourceClusterIndex / env.KIBANA_SOURCE_INDEX

- params for the embedding cluster:
`embeddingClusterUrl` / env.KIBANA_EMBEDDING_CLUSTER_URL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ const getSourceClient = (config: TaskConfig) => {
},
Connection: Elasticsearch8HttpConnection,
requestTimeout: 30_000,
ssl: {
rejectUnauthorized: false,
},
tls: {
rejectUnauthorized: false,
},
});
};

Expand All @@ -54,6 +60,12 @@ const getEmbeddingClient = (config: TaskConfig) => {
// generating embeddings takes time
requestTimeout: 10 * 60 * 1000,
Connection: HttpConnection,
ssl: {
rejectUnauthorized: false,
},
tls: {
rejectUnauthorized: false,
},
});
};

Expand Down Expand Up @@ -84,6 +96,7 @@ export const buildArtifacts = async (config: TaskConfig) => {
buildFolder: config.buildFolder,
targetFolder: config.targetFolder,
sourceClient,
sourceClusterIndex: config.sourceClusterIndex,
embeddingClient,
log,
inferenceId: config.inferenceId ?? defaultInferenceEndpoints.ELSER,
Expand All @@ -102,6 +115,7 @@ const buildArtifact = async ({
sourceClient,
log,
inferenceId,
sourceClusterIndex = 'connector-prod-s3-doc-content-v1',
}: {
productName: ProductName;
stackVersion: string;
Expand All @@ -111,6 +125,7 @@ const buildArtifact = async ({
embeddingClient: Client;
log: ToolingLog;
inferenceId: string;
sourceClusterIndex?: string;
}) => {
log.info(
`Starting building artifact for product [${productName}] and version [${stackVersion}] with inference id [${inferenceId}]`
Expand Down Expand Up @@ -139,7 +154,7 @@ const buildArtifact = async ({

let documents = await extractDocumentation({
client: sourceClient,
index: 'search-docs-1',
index: sourceClusterIndex ?? 'connector-prod-s3-doc-content-v1',
log,
productName,
stackVersion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ function options(y: yargs.Argv) {
demandOption: true,
default: process.env.KIBANA_SOURCE_CLUSTER_PASSWORD,
})
.option('sourceClusterIndex', {
describe: 'The source cluster index',
string: true,
demandOption: true,
default: process.env.KIBANA_SOURCE_INDEX,
})
.option('embeddingClusterUrl', {
describe: 'The embedding cluster url',
string: true,
Expand Down Expand Up @@ -90,6 +96,7 @@ export function runScript() {
sourceClusterUrl: argv.sourceClusterUrl!,
sourceClusterUsername: argv.sourceClusterUsername!,
sourceClusterPassword: argv.sourceClusterPassword!,
sourceClusterIndex: argv.sourceClusterIndex!,
embeddingClusterUrl: argv.embeddingClusterUrl!,
embeddingClusterUsername: argv.embeddingClusterUsername!,
embeddingClusterPassword: argv.embeddingClusterPassword!,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import type { Client as ElasticsearchClient8 } from 'elasticsearch-8.x';
import type { SearchHit } from '@elastic/elasticsearch/lib/api/types';
import type { ToolingLog } from '@kbn/tooling-log';
import type { ProductName } from '@kbn/product-doc-common';
import { getSourceNamesFromProductName, getProductNameFromSource } from '../artifact/product_name';

/** the list of fields to import from the source cluster */
const fields = [
Expand Down Expand Up @@ -40,12 +39,12 @@ export interface ExtractedDocument {
ai_tags: string[];
}

const convertHit = (hit: SearchHit<any>): ExtractedDocument => {
const convertHit = (hit: SearchHit<any>, productName: ProductName): ExtractedDocument => {
const source = hit._source;
return {
content_title: source.content_title,
content_body: source.content_body,
product_name: getProductNameFromSource(source.product_name),
product_name: productName,
root_type: 'documentation',
slug: source.slug,
url: source.url,
Expand All @@ -57,11 +56,43 @@ const convertHit = (hit: SearchHit<any>): ExtractedDocument => {
};
};

const generateSearchCriteriaForProduct = (productName: ProductName) => {
if (productName.toLowerCase() === 'elasticsearch') {
return {
bool: {
minimum_should_match: 1,
should: [
{
match_phrase: {
filename: '*solutions/search*',
},
},
{
wildcard: {
product_name: {
case_insensitive: true,
value: 'elasticsearch',
},
},
},
],
},
};
}
return {
wildcard: {
filename: {
value: `*${productName}*`,
case_insensitive: false,
},
},
};
};
export const extractDocumentation = async ({
client,
index,
stackVersion,
productName,
productName: productNameParam,
log,
}: {
client: ElasticsearchClient8;
Expand All @@ -72,20 +103,30 @@ export const extractDocumentation = async ({
}) => {
log.info(`Starting to extract documents from source cluster`);

const response = await client.search({
const productName = productNameParam.toLowerCase();
const query = {
index,
size: 10000,
query: {
bool: {
must: [
{ terms: { product_name: getSourceNamesFromProductName(productName) } },
{ term: { version: stackVersion } },
{ exists: { field: 'ai_fields.ai_summary' } },
{
bool: {
should: [generateSearchCriteriaForProduct(productName)],
},
},
{
exists: {
field: 'ai_fields.ai_summary',
},
},
],
},
},
fields,
});
};

const response = await client.search(query);

const totalHits =
typeof response.hits.total === 'number'
Expand All @@ -100,5 +141,5 @@ export const extractDocumentation = async ({
`Finished extracting documents from source. ${response.hits.hits.length} documents were extracted`
);

return response.hits.hits.map(convertHit);
return response.hits.hits.map((hit) => convertHit(hit, productName));
};