diff --git a/.changeset/quiet-views-report.md b/.changeset/quiet-views-report.md new file mode 100644 index 0000000000..270e4dcbc7 --- /dev/null +++ b/.changeset/quiet-views-report.md @@ -0,0 +1,5 @@ +--- +"@lynx-js/kitten-lynx-test-infra": patch +--- + +Add `KittenLynxView.url()` so Android test consumers can read the currently navigated Lynx bundle URL. diff --git a/.github/a2ui-catalog.instructions.md b/.github/a2ui-catalog.instructions.md index abe1edf9e0..38460159ce 100644 --- a/.github/a2ui-catalog.instructions.md +++ b/.github/a2ui-catalog.instructions.md @@ -30,6 +30,8 @@ For catalog navigation, keep `components` and `catalog` as route aliases that re When a GenUI package builds a CLI or other generated artifact that another workspace package executes during its own build, declare that package's `dist/**` (or equivalent generated directory) as Turbo `build.outputs`. Without explicit outputs, cache hits can skip restoring the built CLI and leave downstream workspace bins pointing at missing files. +When `packages/genui/a2ui` generates its catalog, ensure `packages/genui/a2ui-catalog-extractor` has been built first. The `genui a2ui generate catalog` command delegates through `@lynx-js/genui-cli`, which imports `../a2ui-catalog-extractor/dist/cli.js`; clean CI runs will fail if that dist CLI is not produced before A2UI's build or API extractor script. + When implementing A2UI v0.9 functions in `packages/genui/a2ui`, keep function resolution scoped to the active catalog first, with the global `FunctionRegistry` only as an escape hatch. Dynamic component props, checks, and function-call actions should all go through the same `resolveDynamicValue` / `executeFunctionCall` path so data bindings, nested function calls, zod argument coercion from `@a2ui/web_core`, and `formatString` data-context interpolation stay consistent. When verifying `packages/genui/a2ui-playground`, remember that `pnpm -F @lynx-js/genui-a2ui build` first runs `tsc --project tsconfig.build.json` and then regenerates catalog JSON through `build:catalog`. The playground consumes `@lynx-js/genui/a2ui` through package exports under `dist/**`, so you normally do not need a separate `tsc` step unless you intentionally skipped the package `build` step. diff --git a/.github/ui-judge.instructions.md b/.github/ui-judge.instructions.md index 74376d404c..b8ed44e3f6 100644 --- a/.github/ui-judge.instructions.md +++ b/.github/ui-judge.instructions.md @@ -2,7 +2,11 @@ applyTo: "packages/genui/ui-judge/**/*" --- -When extending `@lynx-js/ui-judge`, keep `judgePage` as the only public runtime API until a caller needs more surface area. Callers own Playwright page setup, navigation, viewport, cookies, route mocks, and authentication. Additional dimensions should remain internal unless they are intentionally added to the package exports. +When extending `@lynx-js/ui-judge`, keep the public runtime API small and platform-specific. Playwright callers use `judgePage` and own page setup, navigation, viewport, cookies, route mocks, and authentication. Additional dimensions should remain internal unless they are intentionally added to the package exports. + +When adding Android support to `@lynx-js/ui-judge`, keep the public call shape close to `judgePage`: accept the `KittenLynxView` returned by `@lynx-js/kitten-lynx-test-infra`'s `newPage()` as `page`. Callers should own the Kitten-Lynx connection, navigation, and teardown lifecycle, while UI Judge creates the internal Midscene agent adapter. For Android scoring, pass `screenshotIncluded: true` without web-only DOM requirements, and return `page.url()` through the existing result `url` field. + +Keep Android-specific `@lynx-js/ui-judge` tests on Vitest rather than Playwright. Use a dedicated `test:android` script and let Playwright tests stay under `test:playwright`, so the Android emulator CI job can run UI Judge's Kitten-Lynx coverage without pulling in browser fixtures. Midscene scoring in this package should use `aiNumber()` and return a JSON-serializable integer score from 0 to 5. Prompt text must cooperate with Midscene's `aiNumber()` parser by asking for the requested `Number` field, not a bare JSON number. Do not reintroduce letter grades or `GRADE:` output in prompts. diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e5d35023b4..ed82c47237 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,7 +100,7 @@ jobs: exit 0 fi ulimit -Sn 655350 - pnpm --filter @lynx-js/ui-judge test + pnpm --filter @lynx-js/ui-judge run test:playwright ui-judge-comment: needs: ui-judge @@ -356,6 +356,7 @@ jobs: # 8. Run the tests pnpm --filter @lynx-js/kitten-lynx-test-infra run test --coverage --reporter=github-actions --reporter=dot --reporter=junit --outputFile=test-report.junit.xml --coverage.reporter='json' --coverage.reporter='text' --testTimeout=50000 --no-cache --logHeapUsage --silent + UI_JUDGE_ANDROID_INTEGRATION=1 pnpm --filter @lynx-js/ui-judge run test:android --coverage --reporter=github-actions --reporter=dot --reporter=junit --outputFile=ui-judge-android-test-report.junit.xml --coverage.reporter='json' --coverage.reporter='text' --testTimeout=50000 --no-cache --logHeapUsage --silent test-typos: runs-on: lynx-ubuntu-24.04-medium diff --git a/examples/react-externals/package.json b/examples/react-externals/package.json index ee9f641d26..65afa6a17d 100644 --- a/examples/react-externals/package.json +++ b/examples/react-externals/package.json @@ -4,10 +4,9 @@ "private": true, "type": "module", "scripts": { - "build": "pnpm run build:comp-lib && pnpm run build:reactlynx && rspeedy build", + "build": "pnpm run build:comp-lib && rspeedy build", "build:comp-lib": "rslib build --config rslib-comp-lib.config.ts", "build:comp-lib:dev": "cross-env NODE_ENV=development rslib build --config rslib-comp-lib.config.ts", - "build:reactlynx": "pnpm --filter @lynx-js/react-umd build", "dev": "rspeedy dev" }, "dependencies": { diff --git a/packages/genui/a2ui-catalog-extractor/package.json b/packages/genui/a2ui-catalog-extractor/package.json index 29e0a60ba8..61b73fe4f1 100644 --- a/packages/genui/a2ui-catalog-extractor/package.json +++ b/packages/genui/a2ui-catalog-extractor/package.json @@ -34,6 +34,7 @@ "scripts": { "api-extractor": "node ../scripts/run-api-extractor.mjs", "build": "tsc --project tsconfig.build.json", + "build:api": "tsc --project tsconfig.build.json", "test": "rstest" }, "dependencies": { diff --git a/packages/genui/a2ui-catalog-extractor/turbo.json b/packages/genui/a2ui-catalog-extractor/turbo.json index b767a4943d..294effadb6 100644 --- a/packages/genui/a2ui-catalog-extractor/turbo.json +++ b/packages/genui/a2ui-catalog-extractor/turbo.json @@ -19,6 +19,20 @@ "outputs": [ "dist/**" ] + }, + "build:api": { + "dependsOn": [], + "inputs": [ + "src/**", + "bin/**", + "package.json", + "rslib.config.ts", + "tsconfig.build.json", + "tsconfig.json" + ], + "outputs": [ + "dist/**" + ] } } } diff --git a/packages/genui/a2ui-prompt/package.json b/packages/genui/a2ui-prompt/package.json index 3b9b1c7690..c98ad3045d 100644 --- a/packages/genui/a2ui-prompt/package.json +++ b/packages/genui/a2ui-prompt/package.json @@ -23,7 +23,8 @@ ], "scripts": { "api-extractor": "node ../scripts/run-api-extractor.mjs", - "build": "rslib build" + "build": "rslib build", + "build:api": "rslib build" }, "devDependencies": { "@microsoft/api-extractor": "catalog:", diff --git a/packages/genui/a2ui-prompt/turbo.json b/packages/genui/a2ui-prompt/turbo.json index fa17b63f01..3e395ebab1 100644 --- a/packages/genui/a2ui-prompt/turbo.json +++ b/packages/genui/a2ui-prompt/turbo.json @@ -22,6 +22,23 @@ "outputs": [ "dist/**" ] + }, + "build:api": { + "dependsOn": [], + "inputs": [ + "src/**", + "../server/agent/a2ui-catalog.ts", + "../server/agent/a2ui-examples.ts", + "../server/agent/a2ui-prompt.ts", + "../server/agent/catalog/**/*.json", + "package.json", + "rslib.config.ts", + "tsconfig.build.json", + "tsconfig.json" + ], + "outputs": [ + "dist/**" + ] } } } diff --git a/packages/genui/a2ui/package.json b/packages/genui/a2ui/package.json index 6843ae30de..51a81c5ecc 100644 --- a/packages/genui/a2ui/package.json +++ b/packages/genui/a2ui/package.json @@ -139,6 +139,7 @@ "scripts": { "api-extractor": "node ../scripts/run-api-extractor.mjs", "build": "tsc --project tsconfig.build.json && npm run build:catalog", + "build:api": "tsc --project tsconfig.build.json", "build:catalog": "genui a2ui generate catalog --catalog-dir src/catalog --out-dir dist/catalog && node scripts/writeCatalogManifestIndex.js", "test": "rstest" }, diff --git a/packages/genui/a2ui/turbo.json b/packages/genui/a2ui/turbo.json index 413653e6be..d43c628b79 100644 --- a/packages/genui/a2ui/turbo.json +++ b/packages/genui/a2ui/turbo.json @@ -16,6 +16,19 @@ "dist/**" ] }, + "build:api": { + "dependsOn": [], + "inputs": [ + "src/**", + "styles/**", + "tsconfig.json", + "tsconfig.build.json", + "package.json" + ], + "outputs": [ + "dist/**" + ] + }, "api-extractor": { "dependsOn": [ "//#build", diff --git a/packages/genui/openui/package.json b/packages/genui/openui/package.json index 7006692b94..c90dc1084f 100644 --- a/packages/genui/openui/package.json +++ b/packages/genui/openui/package.json @@ -27,7 +27,8 @@ ], "scripts": { "api-extractor": "node ../scripts/run-api-extractor.mjs", - "build": "tsc -p tsconfig.build.json && cp src/core/renderer.css dist/core/renderer.css" + "build": "tsc -p tsconfig.build.json && cp src/core/renderer.css dist/core/renderer.css", + "build:api": "tsc -p tsconfig.build.json" }, "dependencies": { "@openuidev/lang-core": "^0.2.4", diff --git a/packages/genui/openui/turbo.json b/packages/genui/openui/turbo.json index 9473179be9..2ee49082bf 100644 --- a/packages/genui/openui/turbo.json +++ b/packages/genui/openui/turbo.json @@ -15,6 +15,18 @@ "outputs": [ "dist/**" ] + }, + "build:api": { + "dependsOn": [], + "inputs": [ + "src/**", + "tsconfig.json", + "tsconfig.build.json", + "package.json" + ], + "outputs": [ + "dist/**" + ] } } } diff --git a/packages/genui/package.json b/packages/genui/package.json index 72efaada61..9058a6ff7d 100644 --- a/packages/genui/package.json +++ b/packages/genui/package.json @@ -95,7 +95,7 @@ ], "scripts": { "api-extractor": "node scripts/run-api-extractor.mjs", - "build": "pnpm run clean && pnpm --dir a2ui-catalog-extractor build && pnpm --dir a2ui-prompt build && pnpm --dir openui build && pnpm --dir a2ui build && tsc --project tsconfig.build.json", + "build": "pnpm run clean && tsc --project tsconfig.build.json", "clean": "node -e \"require('node:fs').rmSync('dist',{recursive:true,force:true});\"" }, "dependencies": { diff --git a/packages/genui/turbo.json b/packages/genui/turbo.json index 6c7c8c96e9..f25fb8c151 100644 --- a/packages/genui/turbo.json +++ b/packages/genui/turbo.json @@ -35,6 +35,16 @@ "a2ui-prompt/dist/**", "openui/dist/**" ] + }, + "api-extractor": { + "dependsOn": [ + "//#build", + "@lynx-js/genui-a2ui#build:api", + "@lynx-js/genui-a2ui-catalog-extractor#build:api", + "@lynx-js/genui-a2ui-prompt#build:api", + "@lynx-js/genui-openui#build:api" + ], + "cache": false } } } diff --git a/packages/genui/ui-judge/README.md b/packages/genui/ui-judge/README.md index f25694f163..042a66087b 100644 --- a/packages/genui/ui-judge/README.md +++ b/packages/genui/ui-judge/README.md @@ -25,6 +25,26 @@ test('judges generated UI', async ({ page }) => { }); ``` +`judgeAndroidAgent` judges an Android Lynx screen through a Kitten-Lynx page. +Callers own the Kitten-Lynx device/app lifecycle, including connection, +navigation, and teardown. The judge reads `page.url()` for the returned JSON +object, mirroring `judgePage`. + +```ts +import { Lynx } from '@lynx-js/kitten-lynx-test-infra'; +import { judgeAndroidAgent } from '@lynx-js/ui-judge'; + +const lynx = await Lynx.connect({ appPackage: 'com.lynx.explorer' }); +const page = await lynx.newPage(); +await page.goto('http://localhost:8080/main.lynx.bundle'); + +const result = await judgeAndroidAgent({ + page, + task: 'The Lynx app should show a checkout confirmation screen.', + steps: ['Dismiss permission dialog if it appears.'], +}); +``` + When `dimension` is omitted, `judgePage` keeps the legacy `visual-correctness` prompt. GEQI scoring can pass one of these dimensions: diff --git a/packages/genui/ui-judge/etc/ui-judge.api.md b/packages/genui/ui-judge/etc/ui-judge.api.md index 39692528bd..90d08af860 100644 --- a/packages/genui/ui-judge/etc/ui-judge.api.md +++ b/packages/genui/ui-judge/etc/ui-judge.api.md @@ -6,6 +6,29 @@ import type { Page } from '@playwright/test'; +// Warning: (ae-missing-release-tag) "judgeAndroidAgent" is part of the package's API, but it is missing a release tag (@alpha, @beta, @public, or @internal) +// +// @public (undocumented) +export function judgeAndroidAgent(options: JudgeAndroidAgentOptions): Promise; + +// Warning: (ae-missing-release-tag) "JudgeAndroidAgentOptions" is part of the package's API, but it is missing a release tag (@alpha, @beta, @public, or @internal) +// +// @public (undocumented) +export interface JudgeAndroidAgentOptions { + // (undocumented) + dimension?: UiJudgeDimension; + // (undocumented) + page: KittenLynxJudgePage; + // (undocumented) + reference?: string; + // (undocumented) + steps?: string[]; + // (undocumented) + task: string; + // (undocumented) + timeoutMs?: number; +} + // Warning: (ae-missing-release-tag) "judgePage" is part of the package's API, but it is missing a release tag (@alpha, @beta, @public, or @internal) // // @public (undocumented) @@ -29,6 +52,20 @@ export interface JudgePageOptions { timeoutMs?: number; } +// Warning: (ae-missing-release-tag) "KittenLynxJudgePage" is part of the package's API, but it is missing a release tag (@alpha, @beta, @public, or @internal) +// +// @public (undocumented) +export interface KittenLynxJudgePage { + // (undocumented) + screenshot(options?: { + format?: 'jpeg' | 'png' | 'webp'; + path?: string; + quality?: number; + }): Promise; + // (undocumented) + url(): string; +} + // Warning: (ae-missing-release-tag) "UiJudgeDimension" is part of the package's API, but it is missing a release tag (@alpha, @beta, @public, or @internal) // // @public (undocumented) diff --git a/packages/genui/ui-judge/package.json b/packages/genui/ui-judge/package.json index 261f83c58b..56897666cc 100644 --- a/packages/genui/ui-judge/package.json +++ b/packages/genui/ui-judge/package.json @@ -21,18 +21,23 @@ "scripts": { "api-extractor": "node ../scripts/run-api-extractor.mjs", "build": "rslib build", - "test": "pnpm --dir ../a2ui-catalog-extractor build && pnpm --dir ../a2ui build && playwright test" + "test": "pnpm run test:playwright", + "test:android": "vitest run --config vitest.config.ts", + "test:playwright": "playwright test" }, "dependencies": { "@lynx-js/genui": "workspace:*", + "@midscene/core": "^1.8.0", "@midscene/web": "^1.8.0", "@playwright/test": "^1.58.2" }, "devDependencies": { "@lynx-js/genui-a2ui": "workspace:*", "@lynx-js/genui-a2ui-catalog-extractor": "workspace:*", + "@lynx-js/kitten-lynx-test-infra": "workspace:*", "@microsoft/api-extractor": "catalog:", - "@types/node": "^24.10.13" + "@types/node": "^24.10.13", + "vitest": "^3.2.4" }, "engines": { "node": ">=22" diff --git a/packages/genui/ui-judge/playwright.config.ts b/packages/genui/ui-judge/playwright.config.ts index 411056125e..cf52aed22f 100644 --- a/packages/genui/ui-judge/playwright.config.ts +++ b/packages/genui/ui-judge/playwright.config.ts @@ -6,6 +6,7 @@ import type { PlaywrightTestConfig } from '@playwright/test'; const config: PlaywrightTestConfig = defineConfig({ testDir: './tests', + testIgnore: '**/*.vitest.spec.ts', timeout: 180_000, fullyParallel: false, workers: 1, diff --git a/packages/genui/ui-judge/rslib.config.ts b/packages/genui/ui-judge/rslib.config.ts index 31d66ff982..45f478c40b 100644 --- a/packages/genui/ui-judge/rslib.config.ts +++ b/packages/genui/ui-judge/rslib.config.ts @@ -12,6 +12,7 @@ const config: RslibConfig = defineConfig({ entry: { index: './src/index.ts', }, + tsconfigPath: './tsconfig.build.json', }, }); diff --git a/packages/genui/ui-judge/src/index.ts b/packages/genui/ui-judge/src/index.ts index b35ad65cbb..c3adba85cc 100644 --- a/packages/genui/ui-judge/src/index.ts +++ b/packages/genui/ui-judge/src/index.ts @@ -1,6 +1,16 @@ // Copyright 2026 The Lynx Authors. All rights reserved. // Licensed under the Apache License Version 2.0 that can be found in the // LICENSE file in the root directory of this source tree. +import { setTimeout as sleep } from 'node:timers/promises'; + +import type { DeviceAction, Size } from '@midscene/core'; +import { Agent as MidsceneAgent } from '@midscene/core/agent'; +import { + defineActionSwipe, + defineActionTap, + normalizeMobileSwipeParam, +} from '@midscene/core/device'; +import type { AbstractInterface } from '@midscene/core/device'; import { PlaywrightAgent } from '@midscene/web/playwright'; import type { Page } from '@playwright/test'; @@ -19,6 +29,22 @@ export type UiJudgeDimension = export type UiJudgeScore = 0 | 1 | 2 | 3 | 4 | 5; +interface MidsceneJudgeAgent { + aiAct( + step: string, + options?: { abortSignal?: AbortSignal }, + ): Promise; + aiNumber( + prompt: string, + options?: MidsceneJudgeQueryOptions, + ): Promise; +} + +interface MidsceneJudgeQueryOptions { + domIncluded?: boolean | 'visible-only'; + screenshotIncluded?: boolean; +} + export interface JudgePageOptions { dimension?: UiJudgeDimension; page: Page; @@ -28,6 +54,24 @@ export interface JudgePageOptions { timeoutMs?: number; } +export interface KittenLynxJudgePage { + screenshot(options?: { + format?: 'jpeg' | 'png' | 'webp'; + path?: string; + quality?: number; + }): Promise; + url(): string; +} + +export interface JudgeAndroidAgentOptions { + dimension?: UiJudgeDimension; + page: KittenLynxJudgePage; + reference?: string; + steps?: string[]; + task: string; + timeoutMs?: number; +} + export interface UiJudgeError { message: string; } @@ -40,20 +84,27 @@ export interface UiJudgeResult { url: string; } -interface NormalizedJudgePageOptions { +interface NormalizedJudgeOptions { dimension: UiJudgeDimension; - page: Page; reference?: string; steps: string[]; task: string; timeoutMs: number; } +interface NormalizedJudgePageOptions extends NormalizedJudgeOptions { + page: Page; +} + +interface NormalizedJudgeAndroidAgentOptions extends NormalizedJudgeOptions { + page: KittenLynxJudgePage; +} + export async function judgePage( options: JudgePageOptions, ): Promise { try { - const normalized = normalizeOptions(options); + const normalized = normalizeJudgePageOptions(options); const score = await judgePageUnsafe(normalized); return { dimension: normalized.dimension, @@ -72,6 +123,29 @@ export async function judgePage( } } +export async function judgeAndroidAgent( + options: JudgeAndroidAgentOptions, +): Promise { + try { + const normalized = normalizeJudgeAndroidAgentOptions(options); + const score = await judgeAndroidAgentUnsafe(normalized); + return { + dimension: normalized.dimension, + score, + steps: normalized.steps, + url: normalized.page.url(), + }; + } catch (error) { + return { + dimension: getResultDimension(options?.dimension), + error: { message: toErrorMessage(error) }, + score: 0, + steps: normalizeSteps(options?.steps), + url: getKittenLynxPageUrl(options?.page), + }; + } +} + async function judgePageUnsafe( options: NormalizedJudgePageOptions, ): Promise { @@ -83,26 +157,36 @@ async function judgePageUnsafe( }); try { - for (const step of options.steps) { - const abortController = new AbortController(); - await withAbortableTimeout( - agent.aiAct(step, { abortSignal: abortController.signal }), - options.timeoutMs, - abortController, - `Timed out while running Midscene step: ${step}`, - ); - } - - const rawScore = await withTimeout( - agent.aiNumber(buildJudgePrompt(options), { + return await judgeWithAgentUnsafe(agent, options, { + scoreOptions: { domIncluded: 'visible-only', screenshotIncluded: true, - }), - options.timeoutMs, - 'Timed out while asking Midscene for a numeric score.', - ); + }, + }); + } finally { + await agent.destroy().catch(() => { + // Keep the original action or scoring error visible. + }); + } +} - return normalizeScore(rawScore); +async function judgeAndroidAgentUnsafe( + options: NormalizedJudgeAndroidAgentOptions, +): Promise { + const agent = new MidsceneAgent( + new KittenLynxMidscenePage(options.page) as AbstractInterface, + { + autoPrintReportMsg: false, + generateReport: false, + }, + ); + + try { + return await judgeWithAgentUnsafe(agent, options, { + scoreOptions: { + screenshotIncluded: true, + }, + }); } finally { await agent.destroy().catch(() => { // Keep the original action or scoring error visible. @@ -110,24 +194,78 @@ async function judgePageUnsafe( } } -function normalizeOptions( +async function judgeWithAgentUnsafe( + agent: MidsceneJudgeAgent, + options: NormalizedJudgeOptions, + settings: { scoreOptions: MidsceneJudgeQueryOptions }, +): Promise { + for (const step of options.steps) { + const abortController = new AbortController(); + await withAbortableTimeout( + agent.aiAct(step, { abortSignal: abortController.signal }), + options.timeoutMs, + abortController, + `Timed out while running Midscene step: ${step}`, + ); + } + + const rawScore = await withTimeout( + agent.aiNumber(buildJudgePrompt(options), settings.scoreOptions), + options.timeoutMs, + 'Timed out while asking Midscene for a numeric score.', + ); + + return normalizeScore(rawScore); +} + +function normalizeJudgePageOptions( options: JudgePageOptions, ): NormalizedJudgePageOptions { if (!options?.page) { throw new Error('judgePage requires a Playwright page.'); } + return { + ...normalizeJudgeBaseOptions(options, 'judgePage'), + page: options.page, + }; +} + +function normalizeJudgeAndroidAgentOptions( + options: JudgeAndroidAgentOptions, +): NormalizedJudgeAndroidAgentOptions { + if (!isKittenLynxPage(options?.page)) { + throw new Error('judgeAndroidAgent requires a Kitten-Lynx page.'); + } + + const normalized: NormalizedJudgeAndroidAgentOptions = { + ...normalizeJudgeBaseOptions(options, 'judgeAndroidAgent'), + page: options.page, + }; + + return normalized; +} + +function normalizeJudgeBaseOptions( + options: { + dimension?: UiJudgeDimension; + reference?: string; + steps?: string[]; + task: string; + timeoutMs?: number; + }, + apiName: string, +): NormalizedJudgeOptions { const task = typeof options.task === 'string' ? options.task.trim() : ''; if (!task) { - throw new Error('judgePage requires a non-empty task.'); + throw new Error(`${apiName} requires a non-empty task.`); } - const normalized: NormalizedJudgePageOptions = { - dimension: normalizeDimension(options.dimension), - page: options.page, + const normalized: NormalizedJudgeOptions = { + dimension: normalizeDimension(options.dimension, apiName), steps: normalizeSteps(options.steps), task, - timeoutMs: normalizeTimeout(options.timeoutMs), + timeoutMs: normalizeTimeout(options.timeoutMs, apiName), }; const reference = options.reference?.trim(); @@ -140,12 +278,13 @@ function normalizeOptions( function normalizeDimension( dimension: UiJudgeDimension | undefined, + apiName = 'judgePage', ): UiJudgeDimension { if (dimension === undefined) return DEFAULT_DIMENSION; if (isKnownDimension(dimension)) return dimension; throw new Error( - `judgePage dimension must be one of: ${getDimensionNames().join(', ')}.`, + `${apiName} dimension must be one of: ${getDimensionNames().join(', ')}.`, ); } @@ -173,14 +312,231 @@ function normalizeSteps(steps: string[] | undefined): string[] { .filter((step) => step.length > 0); } -function normalizeTimeout(timeoutMs: number | undefined): number { +function normalizeTimeout( + timeoutMs: number | undefined, + apiName = 'judgePage', +): number { if (timeoutMs === undefined) return DEFAULT_TIMEOUT_MS; if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) { - throw new Error('judgePage timeoutMs must be a positive finite number.'); + throw new Error(`${apiName} timeoutMs must be a positive finite number.`); } return timeoutMs; } +interface KittenLynxChannel { + send(method: string, params: Record): Promise; +} + +type KittenLynxViewWithChannel = KittenLynxJudgePage & { + _channel?: KittenLynxChannel; +}; + +type TouchEventType = 'mousePressed' | 'mouseMoved' | 'mouseReleased'; + +interface TouchPoint { + x: number; + y: number; +} + +interface ScreenshotSnapshot { + base64: string; + size: Size; +} + +class KittenLynxMidscenePage { + interfaceType = 'lynx-android'; + private screenshotSnapshot: Promise | undefined; + + constructor(private readonly page: KittenLynxJudgePage) {} + + actionSpace(): DeviceAction[] { + return [ + defineActionTap(async ({ locate }) => { + await this.tapAt({ + x: locate.center[0], + y: locate.center[1], + }); + }), + defineActionSwipe(async (param) => { + const swipe = normalizeMobileSwipeParam(param, await this.size()); + for (let index = 0; index < swipe.repeatCount; index++) { + await this.swipe(swipe.startPoint, swipe.endPoint, swipe.duration); + } + }), + ]; + } + + async screenshotBase64(): Promise { + const screenshot = await this.captureScreenshot(); + return screenshot.base64; + } + + async size(): Promise { + const screenshot = await this.captureScreenshot(); + return screenshot.size; + } + + url(): string { + return this.page.url(); + } + + describe(): string { + return this.page.url(); + } + + beforeInvokeAction(): Promise { + this.screenshotSnapshot = undefined; + return Promise.resolve(); + } + + afterInvokeAction(): Promise { + this.screenshotSnapshot = undefined; + return Promise.resolve(); + } + + destroy(): Promise { + this.screenshotSnapshot = undefined; + return Promise.resolve(); + } + + private async captureScreenshot(): Promise { + this.screenshotSnapshot ??= this.page.screenshot().then( + (buffer: Buffer) => { + const format = getImageFormat(buffer); + return { + base64: `data:image/${format};base64,${buffer.toString('base64')}`, + size: getImageSize(buffer, format), + }; + }, + ).catch((error: unknown) => { + this.screenshotSnapshot = undefined; + throw error; + }); + + return await this.screenshotSnapshot; + } + + private async tapAt(point: TouchPoint): Promise { + await this.touch('mousePressed', point); + await sleep(50); + await this.touch('mouseReleased', point); + } + + private async swipe( + startPoint: TouchPoint, + endPoint: TouchPoint, + duration: number, + ): Promise { + await this.touch('mousePressed', startPoint); + await sleep(Math.max(0, Math.min(duration, 1000)) / 2); + await this.touch('mouseMoved', endPoint); + await sleep(Math.max(0, Math.min(duration, 1000)) / 2); + await this.touch('mouseReleased', endPoint); + } + + private async touch(type: TouchEventType, point: TouchPoint): Promise { + await this.getChannel().send('Input.emulateTouchFromMouseEvent', { + button: 'left', + type, + x: point.x, + y: point.y, + }); + } + + private getChannel(): KittenLynxChannel { + const channel = (this.page as KittenLynxViewWithChannel)._channel; + if (!channel) { + throw new Error( + 'Kitten-Lynx page is not attached yet. Call page.goto() before judgeAndroidAgent().', + ); + } + + return channel; + } +} + +function isKittenLynxPage(page: unknown): page is KittenLynxJudgePage { + return typeof page === 'object' + && page !== null + && 'screenshot' in page + && 'url' in page + && typeof page.screenshot === 'function' + && typeof page.url === 'function'; +} + +function getKittenLynxPageUrl(page: KittenLynxJudgePage | undefined): string { + try { + return isKittenLynxPage(page) ? page.url() : ''; + } catch { + return ''; + } +} + +function getImageFormat(buffer: Buffer): 'jpeg' | 'png' { + if ( + buffer.length >= 8 + && buffer[0] === 0x89 + && buffer[1] === 0x50 + && buffer[2] === 0x4e + && buffer[3] === 0x47 + ) { + return 'png'; + } + + if (buffer.length >= 2 && buffer[0] === 0xff && buffer[1] === 0xd8) { + return 'jpeg'; + } + + throw new Error('Unsupported Kitten-Lynx screenshot format.'); +} + +function getImageSize(buffer: Buffer, format: 'jpeg' | 'png'): Size { + if (format === 'png') { + return { + height: buffer.readUInt32BE(20), + width: buffer.readUInt32BE(16), + }; + } + + let offset = 2; + while (offset < buffer.length) { + if (buffer[offset] !== 0xff) { + break; + } + + if (offset + 4 >= buffer.length) { + break; + } + + const marker = buffer[offset + 1]; + if (marker === undefined) { + break; + } + + const length = buffer.readUInt16BE(offset + 2); + const isStartOfFrame = marker >= 0xc0 + && marker <= 0xcf + && marker !== 0xc4 + && marker !== 0xc8 + && marker !== 0xcc; + + if (isStartOfFrame) { + if (offset + 8 >= buffer.length) { + break; + } + + return { + height: buffer.readUInt16BE(offset + 5), + width: buffer.readUInt16BE(offset + 7), + }; + } + + offset += 2 + length; + } + + throw new Error('Unable to read Kitten-Lynx screenshot dimensions.'); +} + interface JudgeDimensionPromptDefinition { criteria: readonly string[]; focus: string; @@ -256,7 +612,7 @@ const JUDGE_DIMENSION_PROMPTS: Record< }; function buildJudgePrompt( - options: NormalizedJudgePageOptions, + options: NormalizedJudgeOptions, ): string { const dimensionPrompt = JUDGE_DIMENSION_PROMPTS[options.dimension]; const reference = options.reference diff --git a/packages/genui/ui-judge/tests/judge-android-agent.vitest.spec.ts b/packages/genui/ui-judge/tests/judge-android-agent.vitest.spec.ts new file mode 100644 index 0000000000..895840a4d5 --- /dev/null +++ b/packages/genui/ui-judge/tests/judge-android-agent.vitest.spec.ts @@ -0,0 +1,345 @@ +// Copyright 2026 The Lynx Authors. All rights reserved. +// Licensed under the Apache License Version 2.0 that can be found in the +// LICENSE file in the root directory of this source tree. +import { spawn } from 'node:child_process'; +import type { ChildProcess } from 'node:child_process'; +import { dirname, resolve } from 'node:path'; +import { setTimeout as sleep } from 'node:timers/promises'; +import { fileURLToPath } from 'node:url'; + +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +import { Lynx } from '../../../testing-library/kitten-lynx/src/index.js'; +import type { KittenLynxView } from '../../../testing-library/kitten-lynx/src/index.js'; +import { judgeAndroidAgent } from '../src/index.js'; + +const TEST_FIXTURE_PORT = 3001; +const TEST_FIXTURE_URL = + `http://127.0.0.1:${TEST_FIXTURE_PORT}/react-example.lynx.bundle`; +const RUN_ANDROID_INTEGRATION = + process.env['UI_JUDGE_ANDROID_INTEGRATION'] === '1'; +const HELPER_DIR = dirname(fileURLToPath(import.meta.url)); +const WORKSPACE_ROOT = resolve(HELPER_DIR, '../../../..'); +const KITTEN_LYNX_CWD = resolve( + WORKSPACE_ROOT, + 'packages/testing-library/kitten-lynx', +); +const READY_TIMEOUT_MS = 120_000; +const FETCH_TIMEOUT_MS = 2_500; +const POLL_INTERVAL_MS = 250; +const DISPOSE_TIMEOUT_MS = 5_000; +const LOG_LIMIT = 12_000; + +type LynxConnection = Awaited>; + +interface FixtureServer { + dispose(): Promise; + getLogs(): string; +} + +class BoundedLog { + #value = ''; + + append(chunk: unknown): void { + this.#value += Buffer.isBuffer(chunk) + ? chunk.toString('utf8') + : String(chunk); + if (this.#value.length > LOG_LIMIT) { + this.#value = this.#value.slice(-LOG_LIMIT); + } + } + + toString(): string { + return this.#value; + } +} + +describe('judgeAndroidAgent validation', () => { + it('returns a JSON error when a Kitten-Lynx page is missing', async () => { + const result = await judgeAndroidAgent({ + dimension: 'consistency-standards', + page: undefined as never, + task: 'The Lynx app should show a checkout confirmation screen.', + timeoutMs: 3_000, + }); + + expect(result).toMatchObject({ + dimension: 'consistency-standards', + score: 0, + steps: [], + url: '', + }); + expect(result.error?.message).toContain('Kitten-Lynx page'); + }); +}); + +describe.skipIf(!RUN_ANDROID_INTEGRATION)( + 'judgeAndroidAgent Android integration', + () => { + let fixtureServer: FixtureServer | undefined; + let lynx: LynxConnection | undefined; + let page: KittenLynxView | undefined; + + beforeAll(async () => { + fixtureServer = await startKittenLynxFixtureServer(); + await reverseFixturePort(); + + const deviceId = getAndroidDeviceId(); + lynx = await Lynx.connect(deviceId ? { deviceId } : undefined); + page = await lynx.newPage(); + await page.goto(TEST_FIXTURE_URL, { timeout: 15_000 }); + }, 90_000); + + afterAll(async () => { + await lynx?.close(); + await fixtureServer?.dispose(); + }, 30_000); + + it('accepts a Kitten-Lynx newPage result as the page option', async () => { + if (!page) { + throw new Error('Kitten-Lynx page was not created.'); + } + + const result = await judgeAndroidAgent({ + page, + task: '', + timeoutMs: 3_000, + }); + + expect(result).toMatchObject({ + dimension: 'visual-correctness', + score: 0, + steps: [], + url: TEST_FIXTURE_URL, + }); + expect(result.error?.message).toContain( + 'judgeAndroidAgent requires a non-empty task.', + ); + }); + }, +); + +async function startKittenLynxFixtureServer(): Promise { + const stdout = new BoundedLog(); + const stderr = new BoundedLog(); + let spawnError: Error | null = null; + let exitState: { code: number | null; signal: NodeJS.Signals | null } | null = + null; + + const detached = process.platform !== 'win32'; + const pnpmCommand = getPnpmCommand(); + const child = spawn(pnpmCommand.command, [...pnpmCommand.args, 'serve'], { + cwd: KITTEN_LYNX_CWD, + detached, + env: { + ...process.env, + NODE_ENV: 'development', + }, + stdio: ['ignore', 'pipe', 'pipe'], + }); + + child.stdout.on('data', (chunk) => stdout.append(chunk)); + child.stderr.on('data', (chunk) => stderr.append(chunk)); + child.once('error', (error) => { + spawnError = error; + }); + + const exitPromise = new Promise((resolveExit) => { + child.once('exit', (code, signal) => { + exitState = { code, signal }; + resolveExit(); + }); + }); + + const getProcessError = () => { + if (spawnError) { + return new Error( + `Failed to start the Kitten-Lynx fixture server: ${spawnError.message}\n\n${ + formatLogs(stdout, stderr) + }`, + ); + } + if (exitState) { + return new Error( + `Kitten-Lynx fixture server exited before it became ready. code=${ + String(exitState.code) + } signal=${String(exitState.signal)}\n\n${formatLogs(stdout, stderr)}`, + ); + } + return null; + }; + + try { + await waitForFixtureReady(getProcessError); + } catch (error) { + if (!exitState) { + await disposeChildProcess(child, detached, exitPromise); + } + throw error; + } + + return { + async dispose() { + if (!exitState) { + await disposeChildProcess(child, detached, exitPromise); + } + }, + getLogs() { + return formatLogs(stdout, stderr); + }, + }; +} + +function getPnpmCommand(): { args: string[]; command: string } { + const npmExecPath = process.env['npm_execpath']; + if (npmExecPath) { + return { + args: [npmExecPath], + command: process.execPath, + }; + } + + return { + args: [], + command: 'pnpm', + }; +} + +async function waitForFixtureReady( + getProcessError: () => Error | null, +): Promise { + const deadline = Date.now() + READY_TIMEOUT_MS; + + while (Date.now() < deadline) { + const processError = getProcessError(); + if (processError) throw processError; + + if (await fetchOk(TEST_FIXTURE_URL)) { + return; + } + + await sleep(POLL_INTERVAL_MS); + } + + const processError = getProcessError(); + if (processError) throw processError; + throw new Error( + `Timed out waiting for the Kitten-Lynx fixture server at ${TEST_FIXTURE_URL}.`, + ); +} + +async function reverseFixturePort(): Promise { + const configuredDeviceId = getAndroidDeviceId(); + const deviceIds = configuredDeviceId + ? [configuredDeviceId] + : await listAdbDevices(); + + for (const deviceId of deviceIds) { + await runCommand('adb', [ + '-s', + deviceId, + 'reverse', + `tcp:${TEST_FIXTURE_PORT}`, + `tcp:${TEST_FIXTURE_PORT}`, + ]); + } +} + +async function listAdbDevices(): Promise { + const { stdout } = await runCommand('adb', ['devices']); + const devices = stdout.split('\n') + .map((line) => line.trim().split(/\s+/)) + .filter(([serial, state]) => serial && state === 'device') + .map(([serial]) => serial); + + if (devices.length === 0) { + throw new Error('No authorized Android device found through adb.'); + } + + return devices; +} + +async function fetchOk(url: string): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + try { + const response = await fetch(url, { + cache: 'no-store', + signal: controller.signal, + }); + return response.ok; + } catch { + return false; + } finally { + clearTimeout(timeout); + } +} + +async function runCommand( + command: string, + args: string[], +): Promise<{ stdout: string }> { + const child = spawn(command, args, { + stdio: ['ignore', 'pipe', 'pipe'], + }); + const stdout = new BoundedLog(); + const stderr = new BoundedLog(); + + child.stdout.on('data', (chunk) => stdout.append(chunk)); + child.stderr.on('data', (chunk) => stderr.append(chunk)); + + const exitState = await new Promise< + { code: number | null; signal: NodeJS.Signals | null } + >((resolveExit) => { + child.once('exit', (code, signal) => resolveExit({ code, signal })); + }); + + if (exitState.code === 0) { + return { stdout: stdout.toString() }; + } + + throw new Error( + `Command failed: ${command} ${args.join(' ')}. code=${ + String(exitState.code) + } signal=${String(exitState.signal)}\n\n${formatLogs(stdout, stderr)}`, + ); +} + +async function disposeChildProcess( + child: ChildProcess, + detached: boolean, + exitPromise: Promise, +): Promise { + if (!child.pid) return; + + if (detached) { + try { + process.kill(-child.pid, 'SIGTERM'); + } catch { + return; + } + } else { + child.kill('SIGTERM'); + } + + await Promise.race([ + exitPromise, + sleep(DISPOSE_TIMEOUT_MS).then(() => { + if (detached) { + process.kill(-child.pid!, 'SIGKILL'); + } else { + child.kill('SIGKILL'); + } + }), + ]); +} + +function getAndroidDeviceId(): string | undefined { + return process.env['KITTEN_LYNX_DEVICE_ID'] + ?? process.env['ANDROID_SERIAL'] + ?? undefined; +} + +function formatLogs(stdout: BoundedLog, stderr: BoundedLog): string { + return `stdout:\n${stdout.toString()}\n\nstderr:\n${stderr.toString()}`; +} diff --git a/packages/genui/ui-judge/tsconfig.build.json b/packages/genui/ui-judge/tsconfig.build.json new file mode 100644 index 0000000000..68db80dd87 --- /dev/null +++ b/packages/genui/ui-judge/tsconfig.build.json @@ -0,0 +1,4 @@ +{ + "extends": "./tsconfig.json", + "include": ["src"], +} diff --git a/packages/genui/ui-judge/tsconfig.json b/packages/genui/ui-judge/tsconfig.json index 5a4932ea6a..fcca88c8df 100644 --- a/packages/genui/ui-judge/tsconfig.json +++ b/packages/genui/ui-judge/tsconfig.json @@ -10,6 +10,12 @@ "lib": ["DOM", "ES2022"], "types": ["node"], }, - "include": ["src", "tests", "playwright.config.ts", "rslib.config.ts"], + "include": [ + "src", + "tests", + "playwright.config.ts", + "rslib.config.ts", + "vitest.config.ts", + ], "references": [], } diff --git a/packages/genui/ui-judge/vitest.config.ts b/packages/genui/ui-judge/vitest.config.ts new file mode 100644 index 0000000000..5eeb6f9b60 --- /dev/null +++ b/packages/genui/ui-judge/vitest.config.ts @@ -0,0 +1,10 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + environment: 'node', + hookTimeout: 60_000, + include: ['tests/**/*.vitest.spec.ts'], + testTimeout: 60_000, + }, +}); diff --git a/packages/testing-library/kitten-lynx/src/KittenLynxView.ts b/packages/testing-library/kitten-lynx/src/KittenLynxView.ts index c8b60e62be..6f4bd7d966 100644 --- a/packages/testing-library/kitten-lynx/src/KittenLynxView.ts +++ b/packages/testing-library/kitten-lynx/src/KittenLynxView.ts @@ -15,6 +15,7 @@ const idToKittenLynxView: Record> = {}; export class KittenLynxView { private static incId = 1; private _root?: ElementNode; + private _url = ''; _channel!: CDPChannel; readonly id: number; @@ -197,8 +198,10 @@ export class KittenLynxView { s.url === url || s.url === urlPath || url.endsWith(s.url) || s.url.endsWith(urlPath), ); - if (suffixMatches.length === 1) { - matched = suffixMatches[0]; + if (suffixMatches.length > 0) { + matched = suffixMatches.reduce((latest, session) => + session.session_id > latest.session_id ? session : latest + ); } } @@ -236,6 +239,14 @@ export class KittenLynxView { if (!this._channel) { throw new Error('Failed to attach to session for URL: ' + url); } + this._url = url; + } + + /** + * Returns the last URL successfully loaded by {@link goto}. + */ + url(): string { + return this._url; } /** diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9832e79c09..d9b0b60535 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -848,6 +848,9 @@ importers: '@lynx-js/genui': specifier: workspace:* version: link:.. + '@midscene/core': + specifier: ^1.8.0 + version: 1.8.0(ws@8.20.0) '@midscene/web': specifier: ^1.8.0 version: 1.8.0(@playwright/test@1.58.2)(playwright@1.58.2) @@ -861,12 +864,18 @@ importers: '@lynx-js/genui-a2ui-catalog-extractor': specifier: workspace:* version: link:../a2ui-catalog-extractor + '@lynx-js/kitten-lynx-test-infra': + specifier: workspace:* + version: link:../../testing-library/kitten-lynx '@microsoft/api-extractor': specifier: 'catalog:' version: 7.58.2(@types/node@24.10.13) '@types/node': specifier: ^24.10.13 version: 24.10.13 + vitest: + specifier: ^3.2.4 + version: 3.2.4(@types/debug@4.1.12)(@types/node@24.10.13)(@vitest/ui@3.2.4)(jsdom@27.4.0)(less@4.6.4)(lightningcss@1.31.1)(sass-embedded@1.99.0)(sass@1.99.0)(terser@5.31.6) packages/i18n/i18next-translation-dedupe: devDependencies: