elastic · patrykkopycinski · Apr 27, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/.buildkite/pipelines/evals/llm_evals.yml b/.buildkite/pipelines/evals/llm_evals.yml
@@ -242,6 +242,29 @@ steps:
             - exit_status: '-1'
               limit: 3
 
+      - label: 'Evals: PCI Compliance'
+        key: kbn-evals-weekly-pci-compliance
+        command: bash .buildkite/scripts/steps/evals/run_suite.sh
+        env:
+          KBN_EVALS: '1'
+          FTR_EIS_CCM: '1'
+          EVAL_SUITE_ID: 'pci-compliance'
+          EVAL_FANOUT: '1'
+          EVAL_INCLUDE_EIS_MODELS: '1'
+          EVAL_MODEL_GROUPS: *weekly_eis_core_models
+          EVAL_SERVER_CONFIG_SET: 'evals_pci_compliance'
+        timeout_in_minutes: 60
+        agents:
+          image: family/kibana-ubuntu-2404
+          imageProject: elastic-images-prod
+          provider: gcp
+          machineType: n2-standard-8
+          preemptible: true
+        retry:
+          automatic:
+            - exit_status: '-1'
+              limit: 3
+
       - label: 'Evals: Entity Analytics'
         key: kbn-evals-weekly-entity-analytics
         command: bash .buildkite/scripts/steps/evals/run_suite.sh

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -1284,6 +1284,7 @@ x-pack/solutions/security/packages/kbn-cloud-security-posture/public @elastic/co
 x-pack/solutions/security/packages/kbn-evals-suite-attack-discovery @elastic/security-generative-ai
 x-pack/solutions/security/packages/kbn-evals-suite-endpoint @elastic/security-defend-workflows
 x-pack/solutions/security/packages/kbn-evals-suite-entity-analytics @elastic/security-entity-analytics
+x-pack/solutions/security/packages/kbn-evals-suite-pci-compliance @elastic/security-defend-workflows
 x-pack/solutions/security/packages/kbn-evals-suite-security-ai-rules @elastic/security-detection-engine
 x-pack/solutions/security/packages/kbn-scout-security @elastic/appex-qa @elastic/security-engineering-productivity
 x-pack/solutions/security/packages/kbn-securitysolution-autocomplete @elastic/security-detection-engine

diff --git a/package.json b/package.json
@@ -1710,6 +1710,7 @@
     "@kbn/evals-suite-llm-tasks": "link:x-pack/platform/packages/shared/ai-infra/kbn-evals-suite-llm-tasks",
     "@kbn/evals-suite-obs-ai-assistant": "link:x-pack/solutions/observability/packages/kbn-evals-suite-obs-ai-assistant",
     "@kbn/evals-suite-observability-ai": "link:x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai",
+    "@kbn/evals-suite-pci-compliance": "link:x-pack/solutions/security/packages/kbn-evals-suite-pci-compliance",
     "@kbn/evals-suite-security-ai-rules": "link:x-pack/solutions/security/packages/kbn-evals-suite-security-ai-rules",
     "@kbn/evals-suite-significant-events": "link:x-pack/platform/packages/shared/kbn-evals-suite-significant-events",
     "@kbn/evals-suite-streams": "link:x-pack/platform/packages/shared/kbn-evals-suite-streams",

diff --git a/.../src/servers/configs/config_sets/evals_pci_compliance/stateful/classic.stateful.config.ts b/.../src/servers/configs/config_sets/evals_pci_compliance/stateful/classic.stateful.config.ts
@@ -0,0 +1,33 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+import type { ScoutServerConfig } from '../../../../../types';
+import { servers as evalsTracingConfig } from '../../evals_tracing/stateful/classic.stateful.config';
+
+/**
+ * Custom Scout stateful server configuration for PCI DSS v4.0.1 compliance evals.
+ * Enables the Agent Builder experimental features UI setting and the
+ * pciComplianceAgentBuilder experimental flag in Security Solution.
+ *
+ * Usage:
+ *   node scripts/scout start-server --arch stateful --domain classic --serverConfigSet evals_pci_compliance
+ */
+export const servers: ScoutServerConfig = {
+  ...evalsTracingConfig,
+  kbnTestServer: {
+    ...evalsTracingConfig.kbnTestServer,
+    serverArgs: [
+      ...evalsTracingConfig.kbnTestServer.serverArgs,
+      '--uiSettings.overrides.agentBuilder:experimentalFeatures=true',
+      `--xpack.securitySolution.enableExperimental=${JSON.stringify([
+        'pciComplianceAgentBuilder',
+      ])}`,
+    ],
+  },
+};
diff --git a/tsconfig.base.json b/tsconfig.base.json
@@ -1172,6 +1172,8 @@
       "@kbn/evals-suite-obs-ai-assistant/*": ["x-pack/solutions/observability/packages/kbn-evals-suite-obs-ai-assistant/*"],
       "@kbn/evals-suite-observability-ai": ["x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai"],
       "@kbn/evals-suite-observability-ai/*": ["x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/*"],
+      "@kbn/evals-suite-pci-compliance": ["x-pack/solutions/security/packages/kbn-evals-suite-pci-compliance"],
+      "@kbn/evals-suite-pci-compliance/*": ["x-pack/solutions/security/packages/kbn-evals-suite-pci-compliance/*"],
       "@kbn/evals-suite-security-ai-rules": ["x-pack/solutions/security/packages/kbn-evals-suite-security-ai-rules"],
       "@kbn/evals-suite-security-ai-rules/*": ["x-pack/solutions/security/packages/kbn-evals-suite-security-ai-rules/*"],
       "@kbn/evals-suite-significant-events": ["x-pack/platform/packages/shared/kbn-evals-suite-significant-events"],

diff --git a/x-pack/platform/packages/shared/agent-builder/agent-builder-server/allow_lists.ts b/x-pack/platform/packages/shared/agent-builder/agent-builder-server/allow_lists.ts
@@ -45,8 +45,7 @@ export const AGENT_BUILDER_BUILTIN_TOOLS = [
   `${internalNamespaces.security}.get_entity`,
   `${internalNamespaces.security}.search_entities`,
   `${internalNamespaces.security}.pci_scope_discovery`,
-  `${internalNamespaces.security}.pci_compliance_check`,
-  `${internalNamespaces.security}.pci_compliance_report`,
+  `${internalNamespaces.security}.pci_compliance`,
   `${internalNamespaces.security}.pci_field_mapper`,
 
   // Streams

diff --git a/x-pack/platform/packages/shared/kbn-evals/evals.suites.json b/x-pack/platform/packages/shared/kbn-evals/evals.suites.json
@@ -169,6 +169,15 @@
       "tags": ["platform", "workflows"],
       "ciLabels": ["evals:workflows"],
       "serverConfigSet": "evals_workflows"
+    },
+    {
+      "id": "pci-compliance",
+      "name": "PCI DSS v4.0.1 Compliance",
+      "slackChannel": "#security-defend-workflows-tests",
+      "configPath": "x-pack/solutions/security/packages/kbn-evals-suite-pci-compliance/playwright.config.ts",
+      "tags": ["security", "pci-compliance"],
+      "ciLabels": ["evals:pci-compliance"],
+      "serverConfigSet": "evals_pci_compliance"
     }
   ]
 }
diff --git a/x-pack/solutions/security/packages/kbn-evals-suite-pci-compliance/README.md b/x-pack/solutions/security/packages/kbn-evals-suite-pci-compliance/README.md
@@ -0,0 +1,101 @@
+# @kbn/evals-suite-pci-compliance
+
+End-to-end evaluation suite for the **PCI DSS v4.0.1 compliance** Agent Builder
+skill. It exercises the consolidated `pci_compliance` tool along with
+`pci_scope_discovery` and `pci_field_mapper` against a small, deterministic
+dataset and asserts on scoring, evidence, scope claims, and the mandatory QSA
+disclaimer.
+
+The suite is modeled on `@kbn/evals-suite-endpoint` so traces, spans, and
+evaluator fields are directly comparable across security eval suites.
+
+## Prerequisites
+
+- The feature flag `pciComplianceAgentBuilder` must be enabled on the Kibana
+  test server. This is handled automatically when the suite runs through the
+  `evals_pci_compliance` Scout `serverConfigSet`
+  (`src/platform/packages/shared/kbn-scout/src/servers/configs/config_sets/evals_pci_compliance`).
+- An AI connector must be available (see the `@kbn/evals` docs for the
+  standard connector setup).
+- The Agent Builder experimental features UI setting is also enabled by that
+  config set.
+
+## Running
+
+From the Kibana repo root:
+
+```sh
+# Start the Kibana + ES test server with the PCI compliance config set
+node scripts/scout start-server \
+  --arch stateful \
+  --domain classic \
+  --serverConfigSet evals_pci_compliance
+
+# In another terminal, run the suite
+node scripts/evals start --suite pci-compliance
+```
+
+All evaluation specs live under [`evals/pci_compliance`](./evals/pci_compliance).
+
+### Seeding a dev cluster manually
+
+To import the eval data into a remote dev cluster (e.g. Elastic Cloud):
+
+```sh
+# Requires x-pack/.env with: Elasticsearch=<url>, username=<user>, password=<pass>
+./scripts/seed_dev_cluster.sh            # seed data
+./scripts/seed_dev_cluster.sh --cleanup  # delete data streams
+```
+
+## Scenarios
+
+| Spec                                    | Skill / Tool                        | What it asserts                                                                                    |
+| --------------------------------------- | ----------------------------------- | -------------------------------------------------------------------------------------------------- |
+| `full compliance report`                | `pci_compliance` (`mode: "report"`) | Full scorecard across all 12 requirements with correct RED/AMBER/GREEN status.                     |
+| `requirement 8.3.4 — brute force`       | `pci_compliance` (`mode: "check"`)  | Detects 7 failed logins for jdoe (exceeds threshold of 6), RED status.                             |
+| `requirement 4.1 — weak TLS`            | `pci_compliance` (`mode: "check"`)  | Flags TLS 1.0, TLS 1.1, and plain HTTP as violations.                                             |
+| `requirement 2.2.4 — default accounts`  | `pci_compliance` (`mode: "check"`)  | Flags admin and root successful logins as default-account violations.                              |
+| `scope discovery`                       | `pci_scope_discovery`               | Identifies 4 ECS indices and classifies them (identity, network, endpoint).                        |
+| `field mapping for custom data`         | `pci_field_mapper`                  | Suggests correct ECS targets for non-ECS fields (username → user.name, etc.).                      |
+| `scoped check (auth-only)`              | `pci_compliance`                    | Auth requirements produce real findings; network/vuln/malware requirements are NOT_ASSESSABLE.     |
+| `requirement 9 — no matching data`      | `pci_compliance` (`mode: "check"`)  | Returns AMBER/NOT_ASSESSABLE when no physical access events exist.                                 |
+
+## Data generators
+
+Deterministic seed data lives in
+[`src/data_generators/pci_data.ts`](./src/data_generators/pci_data.ts). It
+provisions five data streams:
+
+| Index                     | Contents                                             | Doc count |
+| ------------------------- | ---------------------------------------------------- | --------- |
+| `logs-pci-auth-eval`      | ECS auth events: 7 failed logins (jdoe), admin/root successes, IAM events | 13        |
+| `logs-pci-network-eval`   | TLS 1.3/1.2 (good), TLS 1.0/1.1 (weak), plain HTTP | 6         |
+| `logs-pci-vuln-eval`      | Critical/high CVEs, IDS alerts (exploit, port scan)  | 4         |
+| `logs-pci-endpoint-eval`  | Malware detection, suspicious process execution      | 2         |
+| `logs-pci-custom-eval`    | Non-ECS legacy fields for field-mapper tests         | 4         |
+
+The generators expose `seedPciEvalData()` and `cleanupPciEvalData()` so each
+spec owns its lifecycle without leaking indices between runs.
+
+## Evaluator
+
+The suite uses a PCI-specific criteria evaluator
+(`src/evaluate_dataset.ts#createPciCriteriaEvaluator`) that pins a baseline
+(`BASELINE_PCI_CRITERIA`) asserting:
+
+- The DSS version (`4.0.1`) is referenced.
+- The response declines to act as QSA attestation (non-attestation disclaimer).
+- A structured `scopeClaim` payload is emitted alongside any finding.
+
+Scenario-specific criteria layer on top of the baseline.
+
+## Why a dedicated suite
+
+- **Determinism**: PCI findings depend heavily on the data shape. Seeding a
+  small known-good dataset is far more reliable than reusing generic logs.
+- **Scope-claim parity**: Every PCI tool response ships a scope claim with
+  DSS version, indices, time range, evaluated requirements, checked fields,
+  and a disclaimer. The suite asserts on this for every scenario.
+- **Feature flag isolation**: The `pciComplianceAgentBuilder` flag is
+  off-by-default in Kibana; the `evals_pci_compliance` config set isolates
+  the suite from the rest of the eval runners.