Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .buildkite/ftr_platform_stateful_configs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ disabled:

# Gen AI suites, running with their own pipeline
- x-pack/platform/test/functional_gen_ai/inference/config.ts
- x-pack/platform/test/onechat/smoke_tests/config.stateful.ts

defaultQueue: 'n2-4-spot'
enabled:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
apiVersion: backstage.io/v1alpha1
kind: Resource
metadata:
name: bk-kibana-agent-builder-smoke-tests-daily
description: Runs the Agent Builder smoke tests daily
links:
- url: 'https://buildkite.com/elastic/kibana-agent-builder-smoke-tests-daily'
title: Pipeline link
spec:
type: buildkite-pipeline
owner: 'group:workchat-eng'
system: buildkite
implementation:
apiVersion: buildkite.elastic.dev/v1
kind: Pipeline
metadata:
name: kibana / agent-builder / smoke tests / daily
description: Runs the Agent Builder smoke tests daily
spec:
env:
SLACK_NOTIFICATIONS_CHANNEL: '#agent-builder-eng'
ELASTIC_SLACK_NOTIFICATIONS_ENABLED: 'true'
allow_rebuilds: true
branch_configuration: main
cancel_intermediate_builds: true
default_branch: main
repository: elastic/kibana
pipeline_file: .buildkite/pipelines/agent_builder/smoke_tests.yml
provider_settings:
build_branches: false
build_pull_requests: false
publish_commit_status: false
trigger_mode: none
prefix_pull_request_fork_branch_names: false
skip_pull_request_builds_for_existing_commits: false
teams:
everyone:
access_level: BUILD_AND_READ
workchat-eng:
access_level: MANAGE_BUILD_AND_READ
appex-ai-infra:
access_level: MANAGE_BUILD_AND_READ
kibana-operations:
access_level: MANAGE_BUILD_AND_READ
appex-qa:
access_level: MANAGE_BUILD_AND_READ
kibana-tech-leads:
access_level: MANAGE_BUILD_AND_READ
schedules:
Daily build:
cronline: 0 0 * * * America/New_York
message: Daily build
branch: main
tags:
- kibana
1 change: 1 addition & 0 deletions .buildkite/pipeline-resource-definitions/locations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metadata:
spec:
type: url
targets:
- https://github.com/elastic/kibana/blob/main/.buildkite/pipeline-resource-definitions/kibana-agent-builder-smoke-tests-daily.yml
- https://github.com/elastic/kibana/blob/main/.buildkite/pipeline-resource-definitions/kibana-api-docs.yml
- https://github.com/elastic/kibana/blob/main/.buildkite/pipeline-resource-definitions/kibana-apis-capacity-testing-daily.yml
- https://github.com/elastic/kibana/blob/main/.buildkite/pipeline-resource-definitions/kibana-artifacts-container-image.yml
Expand Down
46 changes: 46 additions & 0 deletions .buildkite/pipelines/agent_builder/smoke_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
env:
FTR_GEN_AI: '1'
steps:
- label: '👨‍🔧 Pre-Build'
command: .buildkite/scripts/lifecycle/pre_build.sh
agents:
image: family/kibana-ubuntu-2404
Comment on lines +1 to +7
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dedicated pipeline which will be running every day

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you like to set up a scheduled pipeline for this?
If so, find inspiration in .buildkite/pipeline-resource-definitions/kibana-api-docs.yml for example. You'll need such a pipeline resource def file (and an entry in locations.yml) so that Terrazzo can generate it's own pipeline.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you! Done in 8e21dd1

imageProject: elastic-images-prod
provider: gcp
machineType: n2-standard-2

- wait

- label: '🧑‍🏭 Build Kibana Distribution'
command: .buildkite/scripts/steps/build_kibana.sh
agents:
image: family/kibana-ubuntu-2404
imageProject: elastic-images-prod
provider: gcp
machineType: n2-standard-8
key: build
if: "build.env('KIBANA_BUILD_ID') == null || build.env('KIBANA_BUILD_ID') == ''"

- wait

- command: .buildkite/scripts/steps/test/ftr_configs.sh
env:
FTR_CONFIG: 'x-pack/platform/test/onechat/smoke_tests/config.stateful.ts'
FTR_CONFIG_GROUP_KEY: 'ftr-agent-builder-smoke-tests'
FTR_GEN_AI: '1'
label: Agent Builder API Smoke Tests
key: ftr-agent-builder-smoke-tests
timeout_in_minutes: 50
parallelism: 1
agents:
image: family/kibana-ubuntu-2404
imageProject: elastic-images-prod
provider: gcp
machineType: n2-standard-4
preemptible: true
retry:
automatic:
- exit_status: '-1'
limit: 3
- exit_status: '*'
limit: 1
30 changes: 30 additions & 0 deletions .buildkite/pipelines/pull_request/agent_builder_smoke_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
steps:
- group: Agent Builder Smoke Tests
key: agent-builder-smoke-tests
depends_on:
- build
- quick_checks
- checks
Comment on lines +1 to +7
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conditional pipeline for the pull requests

- linting
- linting_with_types
- check_types
- check_oas_snapshot
steps:
- command: .buildkite/scripts/steps/test/ftr_configs.sh
env:
FTR_CONFIG: 'x-pack/platform/test/onechat/smoke_tests/config.stateful.ts'
FTR_CONFIG_GROUP_KEY: 'ftr-agent-builder-smoke-tests'
FTR_GEN_AI: '1'
label: Agent Builder API Smoke Tests
key: ftr-agent-builder-smoke-tests
timeout_in_minutes: 50
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In your experience testing this, what was the average minutes for a successful run?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Atm it takes less than 10mins to run. Now TBH this is the default timeout we have on all our FTR test pipeline, I just re-used it 😄

parallelism: 1
agents:
machineType: n2-standard-4
preemptible: true
retry:
automatic:
- exit_status: '-1'
limit: 3
- exit_status: '*'
limit: 1
33 changes: 25 additions & 8 deletions .buildkite/scripts/pipelines/pull_request/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,20 +132,37 @@ const getPipeline = (filename: string, removeSteps = true) => {
pipeline.push(getPipeline('.buildkite/pipelines/pull_request/ux_plugin_e2e.yml'));
}

const aiInfraPaths = [
/^x-pack\/platform\/packages\/shared\/ai-infra/,
/^x-pack\/platform\/plugins\/shared\/ai_infra/,
/^x-pack\/platform\/plugins\/shared\/inference/,
];
const aiConnectorPaths = [
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/bedrock/,
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/gemini/,
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/openai/,
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/inference/,
];
const agentBuilderPaths = [
/^x-pack\/platform\/plugins\/shared\/onechat/,
/^x-pack\/platform\/packages\/shared\/onechat/,
];

if (
(await doAnyChangesMatch([
/^x-pack\/platform\/packages\/shared\/ai-infra/,
/^x-pack\/platform\/plugins\/shared\/ai_infra/,
/^x-pack\/platform\/plugins\/shared\/inference/,
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/bedrock/,
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/gemini/,
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/openai/,
])) ||
(await doAnyChangesMatch([...aiInfraPaths, ...aiConnectorPaths])) ||
GITHUB_PR_LABELS.includes('ci:all-gen-ai-suites')
) {
pipeline.push(getPipeline('.buildkite/pipelines/pull_request/ai_infra_gen_ai.yml'));
}

if (
(await doAnyChangesMatch([...aiInfraPaths, ...aiConnectorPaths, ...agentBuilderPaths])) ||
GITHUB_PR_LABELS.includes('agent-builder:run-smoke-tests') ||
GITHUB_PR_LABELS.includes('ci:all-gen-ai-suites')
) {
pipeline.push(getPipeline('.buildkite/pipelines/pull_request/agent_builder_smoke_tests.yml'));
}
Comment on lines +158 to +164
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Condition on how the smoke test pipeline is executed for PRs


if (
GITHUB_PR_LABELS.includes('ci:build-cloud-image') &&
!GITHUB_PR_LABELS.includes('ci:deploy-cloud') &&
Expand Down
31 changes: 31 additions & 0 deletions x-pack/platform/test/onechat/smoke_tests/config.stateful.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { getPreconfiguredConnectorConfig } from '@kbn/gen-ai-functional-testing';
import type { FtrConfigProviderContext } from '@kbn/test';
import { createStatefulTestConfig } from '../../api_integration_deployment_agnostic/default_configs/stateful.config.base';
import { oneChatApiServices } from '../services/api';

// eslint-disable-next-line import/no-default-export
export default async function (ftrContext: FtrConfigProviderContext) {
const preconfiguredConnectors = getPreconfiguredConnectorConfig();

return createStatefulTestConfig({
services: oneChatApiServices,
Comment on lines +14 to +18
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you use the services that you expose in the ftrContext argument instead of importing import { oneChatApiServices } from '../services/api';?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's misleading because I had to wrap createStatefulTestConfig with my own function to get the connector list, but you are supposed to pass the list of services explicitly (this is actually where you defined your list of services)

We're doing the same for our other test suites, e.g.

import { oneChatApiServices } from '../../onechat/services/api';
export default createStatefulTestConfig({
services: oneChatApiServices,

testFiles: [require.resolve('./tests')],
junit: {
reportName: 'Agent Builder - Smoke Tests - API Integration',
},
// @ts-expect-error
kbnTestServer: {
serverArgs: [
'--uiSettings.overrides.agentBuilder:enabled=true',
`--xpack.actions.preconfigured=${JSON.stringify(preconfiguredConnectors)}`,
],
},
})(ftrContext);
}
14 changes: 14 additions & 0 deletions x-pack/platform/test/onechat/smoke_tests/ftr_provider_context.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { GenericFtrProviderContext } from '@kbn/test';
import { oneChatApiServices as services } from '../services/api';
import { pageObjects } from '../../functional/page_objects';

export type FtrProviderContext = GenericFtrProviderContext<typeof services, typeof pageObjects>;

export { services, pageObjects };
79 changes: 79 additions & 0 deletions x-pack/platform/test/onechat/smoke_tests/tests/converse.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import expect from '@kbn/expect';
import type { AvailableConnectorWithId } from '@kbn/gen-ai-functional-testing';
import { isToolCallStep, platformCoreTools } from '@kbn/onechat-common';
import type {
ChatRequestBodyPayload,
ChatResponse,
} from '@kbn/onechat-plugin/common/http_api/chat';
import type { FtrProviderContext } from '../ftr_provider_context';

export const converseApiSuite = (
{ id: connectorId }: AvailableConnectorWithId,
{ getService }: FtrProviderContext
) => {
const supertest = getService('supertest');

const converse = async <T = ChatResponse>(
payload: ChatRequestBodyPayload,
statusCode = 200
): Promise<T> => {
const res = await supertest
.post('/api/agent_builder/converse')
.set('kbn-xsrf', 'true')
.send(payload)
.expect(statusCode);
return res.body as T;
};

describe('Converse API', () => {
describe('sync', () => {
it('returns an answer for a simple message', async () => {
const response = await converse({
input: 'Hello',
connector_id: connectorId,
});

expect(response.response.message.length).to.be.greaterThan(0);
});

it('can execute a tool', async () => {
const response = await converse({
input: `Using the "platform_core_list_indices" tool, please list my indices. Only call the tool once.`,
connector_id: connectorId,
});

expect(response.response.message.length).to.be.greaterThan(0);

const toolCalls = response.steps.filter(isToolCallStep);
expect(toolCalls.length).to.eql(1);

const toolCall = toolCalls[0];
expect(toolCall.tool_id).to.eql(platformCoreTools.listIndices);
});

it('can continue a text conversation', async () => {
const response1 = await converse({
input: 'Please say "hello"',
connector_id: connectorId,
});

expect(response1.response.message.length).to.be.greaterThan(0);

const response2 = await converse({
conversation_id: response1.conversation_id,
input: 'Please say it again.',
connector_id: connectorId,
});

expect(response2.response.message.length).to.be.greaterThan(0);
});
});
});
Comment on lines +35 to +78
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have stubbed API integ tests to test individual features. The goal is is just to make sure the framework "functions" properly with real LLMs

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, I don't know what the requirements are for smoke tests or how far we should go with them

};
23 changes: 23 additions & 0 deletions x-pack/platform/test/onechat/smoke_tests/tests/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { getAvailableConnectors } from '@kbn/gen-ai-functional-testing';
import type { FtrProviderContext } from '../ftr_provider_context';
import { converseApiSuite } from './converse';

// eslint-disable-next-line import/no-default-export
export default function (providerContext: FtrProviderContext) {
describe('Agent Builder - LLM Smoke tests', async () => {
const connectors = getAvailableConnectors();

connectors.forEach((connector) => {
describe(`Connector "${connector.id}"`, () => {
converseApiSuite(connector, providerContext);
});
});
Comment on lines +17 to +21
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run the smoke tests for each CI connector

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just so I understand, this will run for every connector that is created with this line in the config:

const preconfiguredConnectors = getPreconfiguredConnectorConfig();? And presumably, right now that's only 1 given the naming of this function is singular: getPreconfiguredConnectorConfig

Copy link
Copy Markdown
Contributor Author

@pgayvallet pgayvallet Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a single config, but it contains multiple connectors (I'm just bad at naming or at plurals).

I didn't check the exact list, but from what I remember we are running against

  • claude 3.5 and 3.7
  • Gemini pro 2.5
  • GPT-4o
  • Maybe others

});
}