Skip to content

Commit 1429560

Browse files
[8.x] Tweak product doc generation for 8.17 (#205189) (#205920)
# Backport This will backport the following commits from `main` to `8.x`: - [Tweak product doc generation for 8.17 (#205189)](#205189) <!--- Backport version: 9.4.3 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Pierre Gayvallet","email":"[email protected]"},"sourceCommit":{"committedDate":"2025-01-08T15:19:45Z","message":"Tweak product doc generation for 8.17 (#205189)\n\n## Summary\r\n\r\n- use default elser\r\n- adapt cleaning for new markdown format","sha":"81a5aa97f1caf715b43b369922676609c6fb56e1","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:prev-minor","Team:AI Infra"],"title":"Tweak product doc generation for 8.17","number":205189,"url":"https://github.com/elastic/kibana/pull/205189","mergeCommit":{"message":"Tweak product doc generation for 8.17 (#205189)\n\n## Summary\r\n\r\n- use default elser\r\n- adapt cleaning for new markdown format","sha":"81a5aa97f1caf715b43b369922676609c6fb56e1"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/205189","number":205189,"mergeCommit":{"message":"Tweak product doc generation for 8.17 (#205189)\n\n## Summary\r\n\r\n- use default elser\r\n- adapt cleaning for new markdown format","sha":"81a5aa97f1caf715b43b369922676609c6fb56e1"}}]}] BACKPORT--> Co-authored-by: Pierre Gayvallet <[email protected]>
1 parent f9d428e commit 1429560

File tree

5 files changed

+14
-82
lines changed

5 files changed

+14
-82
lines changed

x-pack/packages/ai-infra/product-doc-artifact-builder/src/build_artifacts.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import {
1414
createTargetIndex,
1515
extractDocumentation,
1616
indexDocuments,
17-
installElser,
1817
createChunkFiles,
1918
createArtifact,
2019
cleanupFolders,
@@ -68,9 +67,6 @@ export const buildArtifacts = async (config: TaskConfig) => {
6867

6968
await cleanupFolders({ folders: [config.buildFolder] });
7069

71-
log.info('Ensuring ELSER is installed on the embedding cluster');
72-
await installElser({ client: embeddingClient });
73-
7470
for (const productName of config.productNames) {
7571
await buildArtifact({
7672
productName,

x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/create_index.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88
import type { Client } from '@elastic/elasticsearch';
99
import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types';
1010

11+
const DEFAULT_ELSER = '.elser-2-elasticsearch';
12+
1113
const mappings: MappingTypeMapping = {
1214
dynamic: 'strict',
1315
properties: {
1416
content_title: { type: 'text' },
1517
content_body: {
1618
type: 'semantic_text',
17-
inference_id: 'kibana-elser2',
19+
inference_id: DEFAULT_ELSER,
1820
},
1921
product_name: { type: 'keyword' },
2022
root_type: { type: 'keyword' },
@@ -24,11 +26,11 @@ const mappings: MappingTypeMapping = {
2426
ai_subtitle: { type: 'text' },
2527
ai_summary: {
2628
type: 'semantic_text',
27-
inference_id: 'kibana-elser2',
29+
inference_id: DEFAULT_ELSER,
2830
},
2931
ai_questions_answered: {
3032
type: 'semantic_text',
31-
inference_id: 'kibana-elser2',
33+
inference_id: DEFAULT_ELSER,
3234
},
3335
ai_tags: { type: 'keyword' },
3436
},

x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/index.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
export { extractDocumentation } from './extract_documentation';
99
export { indexDocuments } from './index_documents';
1010
export { createTargetIndex } from './create_index';
11-
export { installElser } from './install_elser';
1211
export { createChunkFiles } from './create_chunk_files';
1312
export { checkConnectivity } from './check_connectivity';
1413
export { createArtifact } from './create_artifact';

x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/install_elser.ts

Lines changed: 0 additions & 73 deletions
This file was deleted.

x-pack/packages/ai-infra/product-doc-artifact-builder/src/tasks/process_documents.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,15 @@ const removeDuplicates = (documents: ExtractedDocument[]): ExtractedDocument[] =
3333
return uniqBy(documents, (doc) => doc.slug);
3434
};
3535

36+
const EMPTY_DOC_TOKEN_LIMIT = 120;
37+
3638
/**
3739
* Filter "this content has moved" or "deleted pages" type of documents, just based on token count.
3840
*/
3941
const filterEmptyDocs = (documents: ExtractedDocument[]): ExtractedDocument[] => {
4042
return documents.filter((doc) => {
4143
const tokenCount = encode(doc.content_body).length;
42-
if (tokenCount < 100) {
44+
if (tokenCount < EMPTY_DOC_TOKEN_LIMIT) {
4345
return false;
4446
}
4547
return true;
@@ -52,8 +54,14 @@ const processDocument = (document: ExtractedDocument) => {
5254
.replaceAll(/([a-zA-Z])edit\n/g, (match) => {
5355
return `${match[0]}\n`;
5456
})
57+
// remove edit links
58+
.replaceAll(/\[\s*edit\s*\]\(\s*[^)]+\s*\)/g, '')
59+
// remove empty links
60+
.replaceAll('[]()', '')
5561
// limit to 2 consecutive carriage return
5662
.replaceAll(/\n\n+/g, '\n\n');
5763

64+
document.content_title = document.content_title.split('|')[0].trim();
65+
5866
return document;
5967
};

0 commit comments

Comments
 (0)