Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
158 commits
Select commit Hold shift + click to select a range
a79e858
feat(streams): wip enrichment redesign
tonyghiani Jan 23, 2025
e6650ee
feat(streams): wip redesign
tonyghiani Jan 23, 2025
6a58780
refactor(streams): update copies
tonyghiani Jan 24, 2025
cb10b5d
Merge branch 'main' into 93-update-ui-processing
tonyghiani Jan 24, 2025
eeb67a7
refactor(streams): allow text ellipsis
tonyghiani Jan 24, 2025
e18dd93
refactor(streams): reset forms on cancel
tonyghiani Jan 24, 2025
4ec0f7d
refactor(streams): update internal forms structure and typing
tonyghiani Jan 24, 2025
470a22f
refactor(streams): update internal state management to track processo…
tonyghiani Jan 27, 2025
8962b15
refactor(streams): update discard changes modal
tonyghiani Jan 27, 2025
bd81230
refactor(streams): update dissect processor typing
tonyghiani Jan 27, 2025
c260449
refactor(streams): minor changes
tonyghiani Jan 27, 2025
7c1c4f0
refactor(streams): update sampling condition
tonyghiani Jan 28, 2025
78a09a4
Merge branch '93-update-ui-processing' of github.com:tonyghiani/kiban…
tonyghiani Jan 28, 2025
731a125
Merge branch 'tonyghiani-93-update-ui-processing' into 93-update-ui-p…
tonyghiani Jan 28, 2025
c19f57d
Merge branch 'main' into 93-update-ui-processing
tonyghiani Jan 28, 2025
d7fbb25
refactor(streams): improvements to simulation
tonyghiani Jan 28, 2025
224c7df
refactor(streams): update columns rendering for unmatched docs
tonyghiani Jan 28, 2025
39d6a62
refactor(streams): wip simulation table
tonyghiani Jan 28, 2025
10c513f
refactor(streams): wip simulation table style
tonyghiani Jan 28, 2025
12e38a1
Merge branch 'main' into 93-update-ui-processing
tonyghiani Jan 29, 2025
0040628
feat(streams): wip data preview
tonyghiani Jan 29, 2025
23b80da
start suggestions page
flash1293 Jan 29, 2025
96549e2
refactor(streams): minor cleanup
tonyghiani Jan 29, 2025
20d849a
refactor(streams): minor changes
tonyghiani Jan 29, 2025
0df5843
refactor(streams): update live processors udpates
tonyghiani Jan 30, 2025
e48537f
refactor(streams): remove import
tonyghiani Jan 30, 2025
340d238
refactor(streams): remove unused props
tonyghiani Jan 30, 2025
81e38b9
fix(streams): disable simulation on existing processors
tonyghiani Jan 30, 2025
411fac8
refactor(streams): minor changes
tonyghiani Jan 30, 2025
ae4b54c
Merge branch 'main' into 93-update-ui-processing
tonyghiani Jan 30, 2025
9e15664
Merge branch 'main' into 93-update-ui-processing
tonyghiani Jan 30, 2025
5fe314f
basic parsing suggestions
flash1293 Jan 30, 2025
915e846
Merge branch 'main' into 93-update-ui-processing
tonyghiani Jan 31, 2025
39d9f9c
refactor(streams): update usage and remove legacy types
tonyghiani Jan 31, 2025
7ab0375
Merge branch 'main' into 93-update-ui-processing
tonyghiani Jan 31, 2025
c4708bb
refactor(streams): address offline feedback
tonyghiani Jan 31, 2025
a90fe1a
Merge branch '93-update-ui-processing' of github.com:tonyghiani/kiban…
tonyghiani Jan 31, 2025
315f32f
Merge branch 'main' into 93-update-ui-processing
tonyghiani Feb 3, 2025
5c259be
refactor(streams): address change requests
tonyghiani Feb 3, 2025
be1d1ee
refactor(streams): reworded prompt message
tonyghiani Feb 3, 2025
3b9d227
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 4, 2025
3d5b7a9
better processing
flash1293 Feb 4, 2025
5238c9a
ai suggestions for parsing
flash1293 Feb 4, 2025
a4cf9d6
make it somewhat work
flash1293 Feb 5, 2025
94b6e5e
make it work a little more
flash1293 Feb 5, 2025
9dba6c1
make it work a little more even
flash1293 Feb 5, 2025
2aa1331
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 5, 2025
620865c
[CI] Auto-commit changed files from 'node scripts/styled_components_m…
kibanamachine Feb 5, 2025
b4596c2
fix
flash1293 Feb 5, 2025
3e3c50e
Merge branch 'flash1293/llm-parsing-suggestions' of github.com:flash1…
flash1293 Feb 5, 2025
46f47df
fix the prompt
flash1293 Feb 5, 2025
423d871
fix the prompt again
flash1293 Feb 5, 2025
c405695
[CI] Auto-commit changed files from 'node scripts/eslint --no-cache -…
kibanamachine Feb 5, 2025
b3dcb17
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 5, 2025
eeda79c
[CI] Auto-commit changed files from 'node scripts/styled_components_m…
kibanamachine Feb 5, 2025
eb60cc4
improve prompt
flash1293 Feb 5, 2025
bba2eb9
Merge branch 'flash1293/llm-parsing-suggestions' of github.com:flash1…
flash1293 Feb 5, 2025
c70cff0
fix
flash1293 Feb 5, 2025
d7fc7cf
feat(streams): improve stream docs typing
tonyghiani Feb 6, 2025
b19ca23
fix hardcoded connector
flash1293 Feb 6, 2025
742a49c
fix(streams): fix images lazy load chunks
tonyghiani Feb 6, 2025
0a78c65
refactor(streams): wip split simulation utils
tonyghiani Feb 6, 2025
796dae1
Merge branch 'main' into 127-enrichment-simulation-improvements
tonyghiani Feb 6, 2025
c0c4de5
check for connector
flash1293 Feb 6, 2025
759d5db
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 6, 2025
aec2466
fix
flash1293 Feb 6, 2025
e7a06fc
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 6, 2025
7d25576
revert workaround
flash1293 Feb 6, 2025
061aade
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 7, 2025
08b3e5a
refactor(streams): minor change
tonyghiani Feb 7, 2025
2d34c6a
refactor a bit
flash1293 Feb 7, 2025
fa4b760
fixes
flash1293 Feb 7, 2025
38316fa
[CI] Auto-commit changed files from 'node scripts/styled_components_m…
kibanamachine Feb 7, 2025
2c1a8a8
remove retries and fix bugs
flash1293 Feb 7, 2025
36509c1
Merge branch 'flash1293/llm-parsing-suggestions' of github.com:flash1…
flash1293 Feb 7, 2025
a81de65
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 7, 2025
cc81fb8
fix more types
flash1293 Feb 7, 2025
47dd7ed
only send the shown samples
flash1293 Feb 7, 2025
0695680
[CI] Auto-commit changed files from 'node scripts/styled_components_m…
kibanamachine Feb 7, 2025
9a45f2d
wip(streams): simulation API
tonyghiani Feb 7, 2025
4a27b4e
Merge branch 'main' into flash1293/llm-parsing-suggestions
flash1293 Feb 10, 2025
72fc09c
wip(streams): simulation API updates
tonyghiani Feb 10, 2025
6da33ca
all the review comments
flash1293 Feb 10, 2025
8365696
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 10, 2025
b3572fc
more cleanups
flash1293 Feb 10, 2025
f7230e1
Merge branch 'flash1293/llm-parsing-suggestions' of github.com:flash1…
flash1293 Feb 10, 2025
29fb946
refactor(streams): update client usage of state
tonyghiani Feb 10, 2025
a69dc16
refactor(streams): use processors id for simulation
tonyghiani Feb 10, 2025
65cd1de
refactor(streams): update usage of processor id
tonyghiani Feb 10, 2025
80563af
recursiverecord -> sampledocument
flash1293 Feb 10, 2025
289031e
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 10, 2025
62f33cb
fix types
flash1293 Feb 10, 2025
cb1e5f9
refactor(streams): remove unused functions
tonyghiani Feb 11, 2025
e2f42e5
Merge branch 'main' into 127-enrichment-simulation-improvements
tonyghiani Feb 11, 2025
edbdaee
feat(kbn-object-utils): update flattenObject override priority
tonyghiani Feb 11, 2025
fb49143
wip(streams): detected field simulation
tonyghiani Feb 11, 2025
3b3e858
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 11, 2025
e1b19d5
some fixes
flash1293 Feb 11, 2025
996573e
some fixes
flash1293 Feb 11, 2025
6659dfc
refactor
flash1293 Feb 11, 2025
c87762d
fix(streams): performance issue documents parsing
tonyghiani Feb 11, 2025
a574288
wip(streams): add error reporting on streams app
tonyghiani Feb 11, 2025
9ccfdde
feat(streams): add error reporting on streams app
tonyghiani Feb 11, 2025
0b45a52
feat(kbn-object-utils): update naming
tonyghiani Feb 11, 2025
6bf5f1b
feat(streams): remove import
tonyghiani Feb 11, 2025
304381e
refactor(streams): move files
tonyghiani Feb 11, 2025
0b58e5c
feat(streams): apply gracefully handled processor errors
tonyghiani Feb 12, 2025
378dc28
feat(streams): improve error reporting simulation API
tonyghiani Feb 12, 2025
fbd277e
feat(streams): improve simulation API docs comments
tonyghiani Feb 12, 2025
335b326
feat(streams): update ui on disabled submit
tonyghiani Feb 12, 2025
bcdf398
test(streams): update processing simulation tests
tonyghiani Feb 12, 2025
6ca1455
chore(streams): fix typing issues
tonyghiani Feb 12, 2025
1b2fff2
test(streams): add more processing simulation tests
tonyghiani Feb 13, 2025
8e17d42
Merge branch 'main' into 127-enrichment-simulation-improvements
tonyghiani Feb 13, 2025
02057be
Merge branch 'main' into 127-enrichment-simulation-improvements
tonyghiani Feb 13, 2025
fafcd73
refactor(streams): update type
tonyghiani Feb 13, 2025
d5f049e
Merge branch '127-enrichment-simulation-improvements' of github.com:t…
tonyghiani Feb 14, 2025
1ba1eb0
refactor(streams): update errors + badges styles and copies
tonyghiani Feb 14, 2025
f8207d2
refactor(streams): update label pluralization
tonyghiani Feb 14, 2025
0ba9c03
refactor(streams): revert file deletion
tonyghiani Feb 14, 2025
67eafaf
refactor(streams): minor ui changes
tonyghiani Feb 14, 2025
c75e9de
Update src/platform/packages/shared/kbn-object-utils/src/flatten_obje…
tonyghiani Feb 14, 2025
f02e411
Update src/platform/packages/shared/kbn-object-utils/src/flatten_obje…
tonyghiani Feb 14, 2025
72b207f
refactor(streams): minor ui changes
tonyghiani Feb 14, 2025
e539e65
Merge branch '127-enrichment-simulation-improvements' of github.com:t…
tonyghiani Feb 14, 2025
2dce11d
[CI] Auto-commit changed files from 'node scripts/eslint --no-cache -…
kibanamachine Feb 14, 2025
cc3b66e
refactor(streams): fix test
tonyghiani Feb 17, 2025
b461282
Merge branch 'main' into 127-enrichment-simulation-improvements
tonyghiani Feb 17, 2025
c74aa6e
refactor(streams): wip state management
tonyghiani Feb 18, 2025
2c11408
refactor(streams): address review tips
tonyghiani Feb 18, 2025
707cc2c
refactor(streams): fix exports order
tonyghiani Feb 18, 2025
a58291f
refactor(streams): wip state management
tonyghiani Feb 18, 2025
3de284d
refactor(streams): remove leftover z.lazy call
tonyghiani Feb 18, 2025
7cac053
refactor(streams): wip state management
tonyghiani Feb 18, 2025
525f55a
Merge branch '127-enrichment-simulation-improvements' of github.com:t…
tonyghiani Feb 18, 2025
5ec5561
Merge branch 'tonyghiani-127-enrichment-simulation-improvements' into…
tonyghiani Feb 18, 2025
8143adc
refactor(streams): wip state management
tonyghiani Feb 18, 2025
2d440f8
refactor(kbn-object-utils): split flatten object for nested priority …
tonyghiani Feb 18, 2025
5943707
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 18, 2025
e85d43b
Merge remote-tracking branch 'tonyghiani/127-enrichment-simulation-im…
flash1293 Feb 18, 2025
cb2f2cd
remove merge conflict
flash1293 Feb 18, 2025
1740676
[CI] Auto-commit changed files from 'ts-node .buildkite/pipeline-reso…
kibanamachine Feb 18, 2025
5fe3967
review comments
flash1293 Feb 18, 2025
6f1ad40
Merge branch 'flash1293/llm-parsing-suggestions' of github.com:flash1…
flash1293 Feb 18, 2025
739ee42
refactor(streams): wip new stream enrichment hook
tonyghiani Feb 18, 2025
288dc3f
reset errors
flash1293 Feb 18, 2025
0ef51d2
refactor(streams): wip new stream enrichment hook
tonyghiani Feb 19, 2025
35edb90
Merge branch 'main' into 102-refactor-state-management
tonyghiani Feb 19, 2025
8ff8453
refactor(streams): update usage to state machine
tonyghiani Feb 19, 2025
705e53a
Merge branch 'main' into 102-refactor-state-management
tonyghiani Feb 19, 2025
c1f3895
Merge remote-tracking branch 'tonyghiani/102-refactor-state-managemen…
flash1293 Feb 19, 2025
0cade54
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 19, 2025
ef7edbc
Merge remote-tracking branch 'upstream/main' into flash1293/llm-parsi…
flash1293 Feb 19, 2025
44cba3c
[CI] Auto-commit changed files from 'node scripts/styled_components_m…
kibanamachine Feb 19, 2025
c9d307a
revert draft changes
flash1293 Feb 19, 2025
3a96ea4
Merge branch 'flash1293/llm-parsing-suggestions' of github.com:flash1…
flash1293 Feb 19, 2025
b89dad2
fine tuning
flash1293 Feb 19, 2025
6b3ce93
[CI] Auto-commit changed files from 'node scripts/styled_components_m…
kibanamachine Feb 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import {
FilterCondition,
isAlwaysCondition,
Condition,
isFilterCondition,
isAndCondition,
Expand Down Expand Up @@ -62,6 +63,9 @@ export function conditionToQueryDsl(condition: Condition): any {
},
};
}
if (isAlwaysCondition(condition)) {
return { match_all: {} };
}
return {
match_none: {},
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,7 @@ export type FlattenRecord = Record<PropertyKey, Primitive | Primitive[]>;
export const flattenRecord: z.ZodType<FlattenRecord> = z.record(
z.union([primitive, z.array(primitive)])
);

export const sampleDocument = recursiveRecord;

export type SampleDocument = RecursiveRecord;
3 changes: 2 additions & 1 deletion x-pack/solutions/observability/plugins/streams/kibana.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
"usageCollection",
"licensing",
"taskManager",
"alerting"
"alerting",
"inference",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really related to this PR, but that looks like a bunch of dependencies that we can remove? taskManager, encryptedSavedObject, maybe even alerting although I guess we have some references to that in the AssetService

],
"optionalPlugins": [
"cloud",
Expand Down
12 changes: 4 additions & 8 deletions x-pack/solutions/observability/plugins/streams/server/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,18 @@ export class StreamsPlugin
}: {
request: KibanaRequest;
}): Promise<RouteHandlerScopedClients> => {
const [coreStart, assetClient] = await Promise.all([
core.getStartServices().then(([_coreStart]) => _coreStart),
const [[coreStart, pluginsStart], assetClient] = await Promise.all([
core.getStartServices(),
assetService.getClientWithRequest({ request }),
]);

const streamsClient = await streamsService.getClientWithRequest({ request, assetClient });

const scopedClusterClient = coreStart.elasticsearch.client.asScoped(request);
const soClient = coreStart.savedObjects.getScopedClient(request);
const inferenceClient = pluginsStart.inference.getClient({ request });

return {
scopedClusterClient,
soClient,
assetClient,
streamsClient,
};
return { scopedClusterClient, soClient, assetClient, streamsClient, inferenceClient };
},
},
core,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
*/

import {
RecursiveRecord,
SampleDocument,
conditionSchema,
conditionToQueryDsl,
getFields,
Expand Down Expand Up @@ -165,7 +165,7 @@ export const sampleStreamRoute = createServerRoute({
...searchBody,
});

return { documents: results.hits.hits.map((hit) => hit._source) as RecursiveRecord[] };
return { documents: results.hits.hits.map((hit) => hit._source) as SampleDocument[] };
},
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

import {
FlattenRecord,
flattenRecord,
namedFieldDefinitionConfigSchema,
processorWithIdDefinitionSchema,
Expand All @@ -15,6 +16,7 @@ import { checkAccess } from '../../../lib/streams/stream_crud';
import { createServerRoute } from '../../create_server_route';
import { DefinitionNotFoundError } from '../../../lib/streams/errors/definition_not_found_error';
import { ProcessingSimulationParams, simulateProcessing } from './simulation_handler';
import { handleProcessingSuggestion } from './suggestions_handler';

const paramsSchema = z.object({
path: z.object({ name: z.string() }),
Expand Down Expand Up @@ -50,6 +52,51 @@ export const simulateProcessorRoute = createServerRoute({
},
});

export interface ProcessingSuggestionBody {
field: string;
connectorId: string;
samples: FlattenRecord[];
}

const processingSuggestionSchema = z.object({
field: z.string(),
connectorId: z.string(),
samples: z.array(flattenRecord),
});

const suggestionsParamsSchema = z.object({
path: z.object({ name: z.string() }),
body: processingSuggestionSchema,
});

export const processingSuggestionRoute = createServerRoute({
endpoint: 'POST /api/streams/{name}/processing/_suggestions',
options: {
access: 'internal',
},
security: {
authz: {
enabled: false,
reason:
'This API delegates security to the currently logged in user and their Elasticsearch permissions.',
},
},
params: suggestionsParamsSchema,
handler: async ({ params, request, logger, getScopedClients }) => {
const { inferenceClient, scopedClusterClient, streamsClient } = await getScopedClients({
request,
});
return handleProcessingSuggestion(
params.path.name,
params.body,
inferenceClient,
scopedClusterClient,
streamsClient
);
},
});

export const processingRoutes = {
...simulateProcessorRoute,
...processingSuggestionRoute,
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { IScopedClusterClient } from '@kbn/core/server';
import { get, groupBy, mapValues, orderBy, shuffle, uniq, uniqBy } from 'lodash';
import { InferenceClient } from '@kbn/inference-plugin/server';
import { FlattenRecord } from '@kbn/streams-schema';
import { StreamsClient } from '../../../lib/streams/client';
import { simulateProcessing } from './simulation_handler';
import { ProcessingSuggestionBody } from './route';

export const handleProcessingSuggestion = async (
name: string,
body: ProcessingSuggestionBody,
inferenceClient: InferenceClient,
scopedClusterClient: IScopedClusterClient,
streamsClient: StreamsClient
) => {
const { field, samples } = body;
// Turn sample messages into patterns to group by
const evalPattern = (sample: string) => {
return sample
.replace(/[ \t\n]+/g, ' ')
.replace(/[A-Za-z]+/g, 'a')
.replace(/[0-9]+/g, '0')
.replace(/(a a)+/g, 'a')
.replace(/(a0)+/g, 'f')
.replace(/(f:)+/g, 'f:')
.replace(/0(.0)+/g, 'p');
};
Comment on lines +25 to +34
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe some comments/examples here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I meant to add a unit test for this whole handler as it's doing a bunch of things and forgot - will add


const NUMBER_PATTERN_CATEGORIES = 5;
const NUMBER_SAMPLES_PER_PATTERN = 8;

const samplesWithPatterns = samples.map((sample) => {
const pattern = evalPattern(get(sample, field) as string);
return {
document: sample,
fullPattern: pattern,
truncatedPattern: pattern.slice(0, 10),
fieldValue: get(sample, field) as string,
};
});

// Group samples by their truncated patterns
const groupedByTruncatedPattern = groupBy(samplesWithPatterns, 'truncatedPattern');
// Process each group to create pattern summaries
const patternSummaries = mapValues(
groupedByTruncatedPattern,
(samplesForTruncatedPattern, truncatedPattern) => {
const uniqueValues = uniq(samplesForTruncatedPattern.map(({ fieldValue }) => fieldValue));
const shuffledExamples = shuffle(uniqueValues);

return {
truncatedPattern,
count: samplesForTruncatedPattern.length,
exampleValues: shuffledExamples.slice(0, NUMBER_SAMPLES_PER_PATTERN),
};
}
);
// Convert to array, sort by count, and take top patterns
const patternsToProcess = orderBy(Object.values(patternSummaries), 'count', 'desc').slice(
0,
NUMBER_PATTERN_CATEGORIES
);

const results = await Promise.all(
patternsToProcess.map((sample) =>
processPattern(
sample,
name,
body,
inferenceClient,
scopedClusterClient,
streamsClient,
field,
samples
)
)
);

const deduplicatedSimulations = uniqBy(
results.flatMap((result) => result.simulations),
(simulation) => simulation!.pattern
);

return {
patterns: deduplicatedSimulations.map((simulation) => simulation!.pattern),
simulations: deduplicatedSimulations as SimulationWithPattern[],
};
};

type SimulationWithPattern = ReturnType<typeof simulateProcessing> & { pattern: string };

async function processPattern(
sample: { truncatedPattern: string; count: number; exampleValues: string[] },
name: string,
body: ProcessingSuggestionBody,
inferenceClient: InferenceClient,
scopedClusterClient: IScopedClusterClient,
streamsClient: StreamsClient,
field: string,
samples: FlattenRecord[]
) {
const chatResponse = await inferenceClient.output({
id: 'get_pattern_suggestions',
connectorId: body.connectorId,
// necessary due to a bug in the inference client - TODO remove when fixed
functionCalling: 'native',
system: `Instructions:
- You are an assistant for observability tasks with a strong knowledge of logs and log parsing.
- Use JSON format.
- For a single log source identified, provide the following information:
* Use 'source_name' as the key for the log source name.
* Use 'parsing_rule' as the key for the parsing rule.
- Use only Grok patterns for the parsing rule.
* Use %{{pattern:name:type}} syntax for Grok patterns when possible.
* Combine date and time into a single @timestamp field when it's possible.
- Use ECS (Elastic Common Schema) fields whenever possible.
- You are correct, factual, precise, and reliable.
`,
schema: {
type: 'object',
required: ['rules'],
properties: {
rules: {
type: 'array',
items: {
type: 'object',
required: ['parsing_rule'],
properties: {
source_name: {
type: 'string',
},
parsing_rule: {
type: 'string',
},
},
},
},
},
} as const,
input: `Logs:
${sample.exampleValues.join('\n')}
Given the raw messages coming from one data source, help us do the following:
1. Name the log source based on logs format.
2. Write a parsing rule for Elastic ingest pipeline to extract structured fields from the raw message.
Make sure that the parsing rule is unique per log source. When in doubt, suggest multiple patterns, one generic one matching the general case and more specific ones.
`,
});

const patterns = (
chatResponse.output.rules?.map((rule) => rule.parsing_rule).filter(Boolean) as string[]
).map(sanitizePattern);

const simulations = (
await Promise.all(
patterns.map(async (pattern) => {
// Validate match on current sample
const simulationResult = await simulateProcessing({
params: {
path: { name },
body: {
processing: [
{
id: 'grok-processor',
grok: {
field,
if: { always: {} },
patterns: [pattern],
},
},
],
documents: samples,
},
},
scopedClusterClient,
streamsClient,
});

if (simulationResult.is_non_additive_simulation) {
return null;
}

if (simulationResult.success_rate === 0) {
return null;
}

// TODO if success rate is zero, try to strip out the date part and try again

return {
...simulationResult,
pattern,
};
})
)
).filter(Boolean) as Array<SimulationWithPattern | null>;

return {
chatResponse,
simulations,
};
}

/**
* We need to keep parsing additive, but overwriting timestamp or message is super common.
* This is a workaround for now until we found the proper solution for deal with this kind of cases.
*/
function sanitizePattern(pattern: string): string {
return pattern
.replace(/%\{([^}]+):message\}/g, '%{$1:message_derived}')
.replace(/%\{([^}]+):@timestamp\}/g, '%{$1:@timestamp_derived}');
}
Loading