Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
e03b0e8
feat: create @kbn/llm-batch-processing package structure
patrykkopycinski Mar 21, 2026
d8339a0
feat(aesop): add remaining aesop implementation files
patrykkopycinski Mar 22, 2026
e2c8293
feat(evals): integrate autonomous skill discovery routes and agents
patrykkopycinski Mar 22, 2026
e4143b8
feat(llm-batch): restore hierarchical batch processing package
patrykkopycinski Mar 22, 2026
c1f2019
feat(evals): add skill review workflow UI with execution details
patrykkopycinski Mar 22, 2026
daa31b2
feat(evals): implement real-time workflow progress tracking
patrykkopycinski Mar 22, 2026
64e3ddc
feat(evals): add incremental exploration with intelligent change dete…
patrykkopycinski Mar 22, 2026
215a377
test(evals): add comprehensive validation and benchmarking test suites
patrykkopycinski Mar 22, 2026
e909f36
docs(evals): add production implementation planning and architecture …
patrykkopycinski Mar 22, 2026
8cd5c2f
feat(evals): implement active feedback learning loop for continuous i…
patrykkopycinski Mar 22, 2026
dd5844a
feat(evals): add performance monitoring dashboard with 8 operational …
patrykkopycinski Mar 22, 2026
61a213d
feat(evals): implement security hardening with 4-layer defense and ra…
patrykkopycinski Mar 22, 2026
57254ed
test(evals): expand test coverage to 90%+ with UI and integration tests
patrykkopycinski Mar 22, 2026
e43baff
test(evals): add Scout E2E tests and workflow error recovery system
patrykkopycinski Mar 22, 2026
f24da8a
feat(evals): add APM instrumentation and production alerting
patrykkopycinski Mar 22, 2026
0414592
docs(evals): add comprehensive production documentation and operation…
patrykkopycinski Mar 22, 2026
2ae9358
docs(evals): mark production implementation 100% complete
patrykkopycinski Mar 22, 2026
3308c21
feat(evals): add Docker and dev container setup for hypothesis valida…
patrykkopycinski Mar 22, 2026
2d866a7
feat(evals): add fully automated hypothesis validation with complete …
patrykkopycinski Mar 22, 2026
6ddcee9
feat(evals): AESOP spike - autonomous skill discovery, LLM validation…
patrykkopycinski Mar 23, 2026
d00306e
chore: remove generated spike documentation files
patrykkopycinski Mar 23, 2026
4b7c88e
fix(evals): address code quality issues in AESOP spike
patrykkopycinski Mar 23, 2026
ff25fdb
feat(evals): add skill improvement proposals, source indices, and der…
patrykkopycinski Mar 25, 2026
76fda94
fix(evals): address PR feedback and clean up non-AESOP files
patrykkopycinski Mar 25, 2026
ed20243
feat(evals): add convergence validation loop for AESOP skill auto-imp…
patrykkopycinski Mar 25, 2026
97e19f4
feat(evals): add persistent rate limiter backed by Elasticsearch
patrykkopycinski Mar 25, 2026
9cd3364
feat(evals): add deep Agent Builder conversation analysis for AESOP
patrykkopycinski Mar 25, 2026
e4b03f9
feat(evals): add skill deduplication for AESOP exploration workflow
patrykkopycinski Mar 25, 2026
2ff9821
fix(evals): production readiness fixes for AESOP spike
patrykkopycinski Mar 25, 2026
9e1537f
fix(evals): add accessibility aria-labels and error boundary to AESOP UI
patrykkopycinski Mar 25, 2026
dba5475
fix(evals): add ProposedSkillDocument type and replace `as any` casts…
patrykkopycinski Mar 25, 2026
04e50bf
test(evals): replace stub E2E tests with real route handler unit test…
patrykkopycinski Mar 25, 2026
51907d2
fix(evals): address macroscope review findings in AESOP spike
patrykkopycinski Mar 25, 2026
c07f1d3
fix(evals): address 6 macroscope review comments in AESOP spike
patrykkopycinski Mar 25, 2026
659ee42
feat(evals): define AESOP agent configurations for Agent Builder orch…
patrykkopycinski Mar 25, 2026
d2149b8
feat(evals): add agent lifecycle manager for AESOP Agent Builder agents
patrykkopycinski Mar 25, 2026
25a918e
feat(evals): add Agent Builder orchestration for AESOP with backwards…
patrykkopycinski Mar 25, 2026
717a860
fix(evals): fix duration_ms bug and add agent-based improvement route
patrykkopycinski Mar 25, 2026
95417ae
fix(evals): add agent progress tracking, conversation persistence, fi…
patrykkopycinski Mar 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions docker-compose.aesop-spike.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
version: '3.8'

services:
# ═══════════════════════════════════════════════════════════════
# Elasticsearch - Primary data store
# ═══════════════════════════════════════════════════════════════
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:9.4.0-SNAPSHOT
container_name: aesop-elasticsearch
environment:
- discovery.type=single-node
- xpack.security.enabled=true
- ELASTIC_PASSWORD=changeme
- xpack.license.self_generated.type=trial
- xpack.ml.enabled=true
- cluster.name=aesop-cluster
ports:
- "9200:9200"
volumes:
- es-data:/usr/share/elasticsearch/data
networks:
- aesop-network
healthcheck:
test: ["CMD-SHELL", "curl -u elastic:changeme -s http://localhost:9200/_cluster/health | grep -q '\"status\":\"green\\|yellow\"'"]
interval: 10s
timeout: 5s
retries: 30

# ═══════════════════════════════════════════════════════════════
# EDOT Collector - O11y trace collection
# ═══════════════════════════════════════════════════════════════
edot-collector:
image: otel/opentelemetry-collector-contrib:latest
container_name: aesop-edot-collector
command: ["--config=/etc/otel/config.yaml"]
ports:
- "4318:4318" # OTLP HTTP receiver
- "4317:4317" # OTLP gRPC receiver
volumes:
- ./docker/edot-config.yaml:/etc/otel/config.yaml:ro
networks:
- aesop-network
depends_on:
elasticsearch:
condition: service_healthy

# ═══════════════════════════════════════════════════════════════
# Kibana - UI and API server
# ═══════════════════════════════════════════════════════════════
kibana:
image: docker.elastic.co/kibana/kibana:9.4.0-SNAPSHOT
container_name: aesop-kibana
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch:9200
- ELASTICSEARCH_USERNAME=elastic
- ELASTICSEARCH_PASSWORD=changeme
- xpack.evals.enabled=true
- OTEL_EXPORTER_OTLP_ENDPOINT=http://edot-collector:4318
- OTEL_TRACES_EXPORTER=otlp
- OTEL_METRICS_EXPORTER=none
- OTEL_LOGS_EXPORTER=none
ports:
- "5601:5601"
networks:
- aesop-network
depends_on:
elasticsearch:
condition: service_healthy
edot-collector:
condition: service_started
healthcheck:
test: ["CMD-SHELL", "curl -s http://localhost:5601/api/status | grep -q '\"level\":\"available\"'"]
interval: 15s
timeout: 10s
retries: 30

# ═══════════════════════════════════════════════════════════════
# Data Generator - Loads demo data for hypothesis testing
# ═══════════════════════════════════════════════════════════════
data-generator:
image: node:18-alpine
container_name: aesop-data-generator
working_dir: /app
volumes:
- ./x-pack/solutions/security/plugins/security_solution/scripts/aesop_demo:/app
- ./scripts:/scripts
command: sh -c "
echo 'Waiting for Elasticsearch...' &&
sleep 30 &&
echo 'Generating demo data for hypothesis validation...' &&
node data_generator.ts --mode hypothesis-validation
"
networks:
- aesop-network
depends_on:
elasticsearch:
condition: service_healthy
environment:
- ES_URL=http://elasticsearch:9200
- ES_USERNAME=elastic
- ES_PASSWORD=changeme

networks:
aesop-network:
driver: bridge

volumes:
es-data:
driver: local
51 changes: 51 additions & 0 deletions docker/edot-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# EDOT Collector Configuration for AESOP Spike
# Collects OTEL traces from Kibana Agent Builder agents and exports to Elasticsearch

receivers:
otlp:
protocols:
http:
endpoint: 0.0.0.0:4318
grpc:
endpoint: 0.0.0.0:4317

processors:
batch:
timeout: 10s
send_batch_size: 1024

# Add AESOP-specific attributes
attributes:
actions:
- key: deployment.environment
value: aesop-spike-local
action: insert

exporters:
elasticsearch:
endpoints: ["http://elasticsearch:9200"]
auth:
authenticator: basicauth
traces_index: traces-apm.otel-default
mapping:
mode: ecs

# Debug exporter for troubleshooting
debug:
verbosity: detailed
sampling_initial: 5
sampling_thereafter: 200

extensions:
basicauth:
client_auth:
username: elastic
password: changeme

service:
extensions: [basicauth]
pipelines:
traces:
receivers: [otlp]
processors: [batch, attributes]
exporters: [elasticsearch, debug]
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { batchProcess } from '../src/orchestrator';

describe('batchProcess', () => {
it('should process batches concurrently and merge results', async () => {
const items = Array.from({ length: 100 }, (_, i) => i);
const processFn = jest.fn(async (batch: number[]) => batch.reduce((a, b) => a + b, 0));
const mergeFn = jest.fn(async ([a, b]: [number, number]) => a + b);

const result = await batchProcess({
input: items,
splitStrategy: 'item-based',
maxItemsPerBatch: 25,
processFn,
mergeFn,
maxConcurrentBatches: 3,
});

// 100 items / 25 per batch = 4 batches
expect(processFn).toHaveBeenCalledTimes(4);

// Hierarchical merge: 4 outputs -> 2 -> 1 = 3 merge calls
expect(mergeFn).toHaveBeenCalledTimes(3);

// Sum of 0..99 = 4950
expect(result.output).toBe(4950);
expect(result.stats.batches).toBe(4);
expect(result.stats.mergeRounds).toBe(2);
});

it('should respect maxConcurrentBatches', async () => {
const items = Array.from({ length: 10 }, (_, i) => i);
let concurrent = 0;
let maxConcurrent = 0;

const processFn = async (batch: number[]) => {
concurrent++;
maxConcurrent = Math.max(maxConcurrent, concurrent);
await new Promise((resolve) => setTimeout(resolve, 10));
concurrent--;
return batch.reduce((a, b) => a + b, 0);
};

await batchProcess({
input: items,
splitStrategy: 'item-based',
maxItemsPerBatch: 1,
processFn,
mergeFn: async ([a, b]) => a + b,
maxConcurrentBatches: 2,
});

expect(maxConcurrent).toBeLessThanOrEqual(2);
});

it('should call onProgress callback', async () => {
const items = [1, 2, 3, 4];
const progressUpdates: Array<{ completed: number; total: number }> = [];

await batchProcess({
input: items,
splitStrategy: 'item-based',
maxItemsPerBatch: 1,
processFn: async (batch) => batch[0],
mergeFn: async ([a, b]) => a + b,
onProgress: (completed, total) => {
progressUpdates.push({ completed, total });
},
});

expect(progressUpdates).toEqual([
{ completed: 1, total: 4 },
{ completed: 2, total: 4 },
{ completed: 3, total: 4 },
{ completed: 4, total: 4 },
]);
});

it('should throw on invalid split strategy', async () => {
await expect(
batchProcess({
input: [1, 2, 3],
splitStrategy: 'invalid' as any,
processFn: async (batch) => batch[0],
mergeFn: async ([a, b]) => a + b,
})
).rejects.toThrow('Unknown split strategy');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

module.exports = {
preset: '@kbn/test/jest_node',
rootDir: '../../../../..',
roots: ['<rootDir>/x-pack/platform/packages/shared/kbn-llm-batch-processing'],
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"type": "shared-common",
"id": "@kbn/llm-batch-processing",
"owner": "@elastic/security-generative-ai",
"group": "platform",
"visibility": "shared"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"name": "@kbn/llm-batch-processing",
"private": true,
"version": "1.0.0",
"license": "Elastic License 2.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

// Main entry point
export { batchProcess } from './orchestrator';

// Low-level utilities
export { tokenBasedSplit, itemBasedSplit } from './split';
export { hierarchicalMerge } from './merge';

// Types
export type { BatchConfig, BatchResult, BatchStats, SplitStrategy, MergeStrategy } from './types';
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

/**
* Merge batch outputs hierarchically (tournament-style pairwise merge)
*
* @param outputs - Array of batch outputs to merge
* @param mergeFn - Function to merge two outputs
* @returns Final merged output
*
* @example
* ```typescript
* const batches = ['batch1', 'batch2', 'batch3', 'batch4'];
* const result = await hierarchicalMerge(batches, async ([a, b]) => {
* return await llm.summarize([a, b]);
* });
* ```
*/
export async function hierarchicalMerge<T>(
outputs: T[],
mergeFn: (pair: [T, T]) => Promise<T>
): Promise<T> {
if (outputs.length === 0) {
throw new Error('Cannot merge empty array');
}

if (outputs.length === 1) {
return outputs[0];
}

let current = outputs;

while (current.length > 1) {
const nextRound: T[] = [];

for (let i = 0; i < current.length; i += 2) {
if (i + 1 < current.length) {
// Merge pair
const merged = await mergeFn([current[i], current[i + 1]]);
nextRound.push(merged);
} else {
// Odd one out, pass through to next round
nextRound.push(current[i]);
}
}

current = nextRound;
}

return current[0];
}
Loading
Loading