diff --git a/.env.example b/.env.example index 16caa43266..329091edfa 100644 --- a/.env.example +++ b/.env.example @@ -187,3 +187,17 @@ MAX_CONCURRENT_CONVERSATIONS=10 # Maximum concurrent AI conversations (default: # Session Retention # SESSION_RETENTION_DAYS=30 # Delete inactive sessions older than N days (default: 30) + +# Anonymous Telemetry (optional) +# Archon sends anonymous workflow-invocation events to PostHog so maintainers +# can see which workflows get real usage. No PII — workflow name/description + +# platform + Archon version + a random install UUID. No identities, no prompts, +# no paths, no code. See README "Telemetry" for the full list. +# +# Opt out (any one disables telemetry): +# ARCHON_TELEMETRY_DISABLED=1 +# DO_NOT_TRACK=1 (de facto standard) +# +# Point at a self-hosted PostHog or a different project: +# POSTHOG_API_KEY=phc_yourKeyHere +# POSTHOG_HOST=https://eu.i.posthog.com (default: https://us.i.posthog.com) diff --git a/README.md b/README.md index a346ccbb96..717e2649eb 100644 --- a/README.md +++ b/README.md @@ -315,6 +315,23 @@ Full documentation is available at **[archon.diy](https://archon.diy)**. | [Architecture](https://archon.diy/reference/architecture/) | System design and internals | | [Troubleshooting](https://archon.diy/reference/troubleshooting/) | Common issues and fixes | +## Telemetry + +Archon sends a single anonymous event — `workflow_invoked` — each time a workflow starts, so maintainers can see which workflows get real usage and prioritize accordingly. **No PII, ever.** + +**What's collected:** the workflow name, the workflow description (both authored by you in YAML), the platform that triggered it (`cli`, `web`, `slack`, etc.), the Archon version, and a random install UUID stored at `~/.archon/telemetry-id`. Nothing else. + +**What's *not* collected:** your code, prompts, messages, git remotes, file paths, usernames, tokens, AI output, workflow node details — none of it. + +**Opt out:** set any of these in your environment: + +```bash +ARCHON_TELEMETRY_DISABLED=1 +DO_NOT_TRACK=1 # de facto standard honored by Astro, Bun, Prisma, Nuxt, etc. +``` + +Self-host PostHog or use a different project by setting `POSTHOG_API_KEY` and `POSTHOG_HOST`. + ## Contributing Contributions welcome! See the open [issues](https://github.com/coleam00/Archon/issues) for things to work on. diff --git a/bun.lock b/bun.lock index cf5b5efd7d..8599602c73 100644 --- a/bun.lock +++ b/bun.lock @@ -118,6 +118,7 @@ "dotenv": "^17", "pino": "^9", "pino-pretty": "^13", + "posthog-node": "^5.29.2", }, "peerDependencies": { "typescript": "^5.0.0", @@ -620,6 +621,8 @@ "@pinojs/redact": ["@pinojs/redact@0.4.0", "", {}, "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg=="], + "@posthog/core": ["@posthog/core@1.25.2", "", {}, "sha512-h2FO7ut/BbfwpAXWpwdDHTzQgUo9ibDFEs6ZO+3cI3KPWQt5XwczK1OLAuPprcjm8T/jl0SH8jSFo5XdU4RbTg=="], + "@radix-ui/number": ["@radix-ui/number@1.1.1", "", {}, "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g=="], "@radix-ui/primitive": ["@radix-ui/primitive@1.1.3", "", {}, "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg=="], @@ -2010,6 +2013,8 @@ "postgres-interval": ["postgres-interval@1.2.0", "", { "dependencies": { "xtend": "^4.0.0" } }, "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ=="], + "posthog-node": ["posthog-node@5.29.2", "", { "dependencies": { "@posthog/core": "1.25.2" }, "peerDependencies": { "rxjs": "^7.0.0" }, "optionalPeers": ["rxjs"] }, "sha512-rI7kkF0XqDc0G1qjx+Hb4iuY9NAlL+XQNoGOpnEpRNTUcXvjY6WlsRGZ9m2whgc39emrrYdszi/YT8wZkr2xsg=="], + "powershell-utils": ["powershell-utils@0.1.0", "", {}, "sha512-dM0jVuXJPsDN6DvRpea484tCUaMiXWjuCn++HGTqUWzGDjv5tZkEZldAJ/UMlqRYGFrD/etByo4/xOuC/snX2A=="], "prelude-ls": ["prelude-ls@1.2.1", "", {}, "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g=="], diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 5b66262435..cb8ddd80b0 100755 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -80,6 +80,7 @@ import { checkForUpdate, BUNDLED_IS_BINARY, BUNDLED_VERSION, + shutdownTelemetry, } from '@archon/paths'; import * as git from '@archon/git'; @@ -573,6 +574,9 @@ async function main(): Promise { } return 1; } finally { + // Flush queued telemetry events before the CLI process exits. + // Short-lived CLI commands lose buffered events if shutdown() is skipped. + await shutdownTelemetry(); // Always close database connection await closeDb(); } diff --git a/packages/paths/package.json b/packages/paths/package.json index 19267ebaed..eafd963f57 100644 --- a/packages/paths/package.json +++ b/packages/paths/package.json @@ -16,7 +16,8 @@ "dependencies": { "dotenv": "^17", "pino": "^9", - "pino-pretty": "^13" + "pino-pretty": "^13", + "posthog-node": "^5.29.2" }, "peerDependencies": { "typescript": "^5.0.0" diff --git a/packages/paths/src/index.ts b/packages/paths/src/index.ts index 99a254f4ca..8f067cfeca 100644 --- a/packages/paths/src/index.ts +++ b/packages/paths/src/index.ts @@ -43,3 +43,7 @@ export { parseLatestRelease, } from './update-check'; export type { UpdateCheckResult } from './update-check'; + +// Anonymous telemetry +export { captureWorkflowInvoked, shutdownTelemetry, isTelemetryDisabled } from './telemetry'; +export type { WorkflowInvokedProperties } from './telemetry'; diff --git a/packages/paths/src/telemetry.test.ts b/packages/paths/src/telemetry.test.ts new file mode 100644 index 0000000000..23889fe47d --- /dev/null +++ b/packages/paths/src/telemetry.test.ts @@ -0,0 +1,151 @@ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { existsSync, mkdtempSync, readFileSync, rmSync } from 'fs'; + +import { + isTelemetryDisabled, + captureWorkflowInvoked, + shutdownTelemetry, + resetTelemetryForTests, + getOrCreateTelemetryId, +} from './telemetry'; + +const ENV_VARS = [ + 'ARCHON_HOME', + 'ARCHON_TELEMETRY_DISABLED', + 'DO_NOT_TRACK', + 'POSTHOG_API_KEY', + 'POSTHOG_HOST', +]; + +function saveEnv(): Record { + const saved: Record = {}; + for (const key of ENV_VARS) saved[key] = process.env[key]; + return saved; +} + +function restoreEnv(saved: Record): void { + for (const key of ENV_VARS) { + if (saved[key] === undefined) { + delete process.env[key]; + } else { + process.env[key] = saved[key]; + } + } +} + +describe('telemetry opt-out detection', () => { + let saved: Record; + + beforeEach(() => { + saved = saveEnv(); + resetTelemetryForTests(); + }); + + afterEach(() => { + restoreEnv(saved); + resetTelemetryForTests(); + }); + + test('enabled by default when no opt-out env vars set', () => { + delete process.env.ARCHON_TELEMETRY_DISABLED; + delete process.env.DO_NOT_TRACK; + delete process.env.POSTHOG_API_KEY; + expect(isTelemetryDisabled()).toBe(false); + }); + + test('ARCHON_TELEMETRY_DISABLED=1 disables telemetry', () => { + process.env.ARCHON_TELEMETRY_DISABLED = '1'; + expect(isTelemetryDisabled()).toBe(true); + }); + + test('DO_NOT_TRACK=1 disables telemetry', () => { + process.env.DO_NOT_TRACK = '1'; + expect(isTelemetryDisabled()).toBe(true); + }); + + test('ARCHON_TELEMETRY_DISABLED=0 does not disable (strict "1" match)', () => { + process.env.ARCHON_TELEMETRY_DISABLED = '0'; + delete process.env.DO_NOT_TRACK; + expect(isTelemetryDisabled()).toBe(false); + }); + + test('empty POSTHOG_API_KEY override disables telemetry', () => { + process.env.POSTHOG_API_KEY = ''; + delete process.env.ARCHON_TELEMETRY_DISABLED; + delete process.env.DO_NOT_TRACK; + expect(isTelemetryDisabled()).toBe(true); + }); +}); + +describe('captureWorkflowInvoked when disabled', () => { + let saved: Record; + + beforeEach(() => { + saved = saveEnv(); + resetTelemetryForTests(); + process.env.ARCHON_TELEMETRY_DISABLED = '1'; + }); + + afterEach(() => { + restoreEnv(saved); + resetTelemetryForTests(); + }); + + test('does not throw when telemetry is disabled', () => { + expect(() => { + captureWorkflowInvoked({ + workflowName: 'test-workflow', + workflowDescription: 'A test', + platform: 'cli', + archonVersion: 'dev', + }); + }).not.toThrow(); + }); + + test('shutdownTelemetry is a no-op when never initialized', async () => { + await expect(shutdownTelemetry()).resolves.toBeUndefined(); + }); +}); + +describe('telemetry ID persistence', () => { + let saved: Record; + let tmpHome: string; + + beforeEach(() => { + saved = saveEnv(); + tmpHome = mkdtempSync(join(tmpdir(), 'archon-telemetry-test-')); + process.env.ARCHON_HOME = tmpHome; + // Force-disable actual network capture — we only exercise the ID path. + process.env.ARCHON_TELEMETRY_DISABLED = '1'; + resetTelemetryForTests(); + }); + + afterEach(() => { + restoreEnv(saved); + resetTelemetryForTests(); + rmSync(tmpHome, { recursive: true, force: true }); + }); + + test('calling capture while disabled does not create a telemetry-id file', () => { + captureWorkflowInvoked({ workflowName: 'w' }); + expect(existsSync(join(tmpHome, 'telemetry-id'))).toBe(false); + }); + + test('an existing telemetry-id file is preserved (not overwritten)', async () => { + const { writeFileSync, mkdirSync } = await import('fs'); + const existingId = '11111111-1111-4111-8111-111111111111'; + mkdirSync(tmpHome, { recursive: true }); + writeFileSync(join(tmpHome, 'telemetry-id'), existingId, 'utf8'); + + resetTelemetryForTests(); + + // Direct, synchronous call — no network, no fire-and-forget, no timer. + const resolved = getOrCreateTelemetryId(); + + expect(resolved).toBe(existingId); + const stored = readFileSync(join(tmpHome, 'telemetry-id'), 'utf8').trim(); + expect(stored).toBe(existingId); + }); +}); diff --git a/packages/paths/src/telemetry.ts b/packages/paths/src/telemetry.ts new file mode 100644 index 0000000000..4c68649dab --- /dev/null +++ b/packages/paths/src/telemetry.ts @@ -0,0 +1,246 @@ +/** + * Anonymous PostHog telemetry for Archon. + * + * Emits one event — `workflow_invoked` — each time a workflow starts. No PII, + * no user identity. A random UUID is persisted to `${ARCHON_HOME}/telemetry-id` + * so we can count distinct installs; `$process_person_profile: false` keeps + * events in PostHog's anonymous tier (no person profile ever created). + * + * Opt-out (any one disables telemetry): + * - ARCHON_TELEMETRY_DISABLED=1 + * - DO_NOT_TRACK=1 (de facto standard) + * - POSTHOG_API_KEY unset *and* no embedded default + * + * All functions are fire-and-forget: telemetry errors are logged at debug level + * and swallowed. Capture must never crash Archon. + */ +import { randomUUID } from 'crypto'; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import type { PostHog } from 'posthog-node'; +import { getArchonHome } from './archon-paths'; +import { createLogger } from './logger'; + +// Minimal shape of posthog-node's `fetch` option — copied from @posthog/core +// (a transitive dep) to avoid pulling it in as a direct dependency. +interface PostHogFetchOptions { + method: 'GET' | 'POST' | 'PUT' | 'PATCH'; + mode?: 'no-cors'; + credentials?: 'omit'; + headers: Record; + body?: string | Blob; + signal?: AbortSignal; +} +interface PostHogFetchResponse { + status: number; + text: () => Promise; + json: () => Promise; + headers?: { get(name: string): string | null }; +} + +/** + * Embedded write-only PostHog project key. Safe to ship in source: `phc_*` + * keys can only write events, never read data. Override with POSTHOG_API_KEY + * for self-hosted PostHog or a different project. + */ +const EMBEDDED_POSTHOG_API_KEY = 'phc_rR7oacut9mm4upGRbuoMptnyjRium34TTbbqobiQYS7x'; +const DEFAULT_POSTHOG_HOST = 'https://us.i.posthog.com'; + +/** Max length of workflow description sent to PostHog. Guards against unusually long YAML descriptions. */ +const DESCRIPTION_MAX_LENGTH = 500; + +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('telemetry'); + return cachedLog; +} + +function getApiKey(): string { + return process.env.POSTHOG_API_KEY ?? EMBEDDED_POSTHOG_API_KEY; +} + +function getHost(): string { + return process.env.POSTHOG_HOST ?? DEFAULT_POSTHOG_HOST; +} + +/** + * Check whether telemetry is disabled via env vars or missing key. + * Exported for tests and callers that want to short-circuit early. + */ +export function isTelemetryDisabled(): boolean { + if (process.env.ARCHON_TELEMETRY_DISABLED === '1') return true; + if (process.env.DO_NOT_TRACK === '1') return true; + if (!getApiKey()) return true; + return false; +} + +/** + * Load or create a stable anonymous install UUID at `${ARCHON_HOME}/telemetry-id`. + * If the file can't be read or written (permissions, disk full), a fresh UUID + * is returned for this session — telemetry still works, just not correlated + * across runs. + * + * Exported so tests can exercise the id-resolution invariants directly + * without spinning up the PostHog client. + * @internal + */ +export function getOrCreateTelemetryId(): string { + const idPath = join(getArchonHome(), 'telemetry-id'); + try { + if (existsSync(idPath)) { + const existing = readFileSync(idPath, 'utf8').trim(); + if (existing) return existing; + } + } catch (error) { + getLog().debug({ err: error as Error, idPath }, 'telemetry.id_read_failed'); + } + + const id = randomUUID(); + try { + mkdirSync(getArchonHome(), { recursive: true }); + writeFileSync(idPath, id, 'utf8'); + } catch (error) { + getLog().debug({ err: error as Error, idPath }, 'telemetry.id_persist_failed'); + } + return id; +} + +let telemetryIdCache: string | undefined; +function getTelemetryId(): string { + if (!telemetryIdCache) telemetryIdCache = getOrCreateTelemetryId(); + return telemetryIdCache; +} + +/** + * Lazy singleton. `undefined` = not yet initialized; `null` = disabled or + * init failed; `PostHog` = live client. Init runs once per process. + */ +let clientInit: Promise | undefined; + +async function getClient(): Promise { + if (clientInit === undefined) { + clientInit = initClient(); + } + return clientInit; +} + +/** + * Fetch wrapper that masks all failures as 200 responses. The PostHog SDK's + * internal `logFlushError` writes to stderr via `console.error` on any network + * or HTTP error, bypassing logger configuration (see `@posthog/core` + * `posthog-core-stateless.mjs` `logFlushError`). For a fire-and-forget + * telemetry path we want zero user-visible noise when PostHog is unreachable + * (offline, firewalled, DNS broken, rate-limited), so we intercept failures + * before the SDK sees them. The original error is still recorded at debug + * level. + */ +const FAKE_OK_RESPONSE: PostHogFetchResponse = { + status: 200, + text: () => Promise.resolve('{"status":"ok"}'), + json: () => Promise.resolve({ status: 'ok' }), + headers: { get: () => null }, +}; + +async function silentFetch( + url: string, + options: PostHogFetchOptions +): Promise { + try { + const res = await fetch(url, options as RequestInit); + if (res.status < 200 || res.status >= 400) { + getLog().debug({ status: res.status }, 'telemetry.http_non_2xx_suppressed'); + return FAKE_OK_RESPONSE; + } + return res; + } catch (error) { + getLog().debug({ err: error as Error }, 'telemetry.fetch_failed_suppressed'); + return FAKE_OK_RESPONSE; + } +} + +async function initClient(): Promise { + if (isTelemetryDisabled()) return null; + try { + const posthogModule = await import('posthog-node'); + const client = new posthogModule.PostHog(getApiKey(), { + host: getHost(), + flushAt: 20, + flushInterval: 10000, + disableGeoip: true, + fetch: silentFetch, + }); + // Defensive: also hook the client-level error channel in case a future + // posthog-node version routes errors there instead of (or in addition to) + // the internal console.error path. + client.on('error', (err: Error) => { + getLog().debug({ err }, 'telemetry.client_error'); + }); + return client; + } catch (error) { + getLog().debug({ err: error as Error }, 'telemetry.init_failed'); + return null; + } +} + +export interface WorkflowInvokedProperties { + workflowName: string; + workflowDescription?: string; + platform?: string; + archonVersion?: string; +} + +/** + * Fire-and-forget capture of a `workflow_invoked` event. Never throws, never + * awaits — safe to call from hot paths. + */ +export function captureWorkflowInvoked(props: WorkflowInvokedProperties): void { + if (isTelemetryDisabled()) return; + void (async (): Promise => { + try { + const client = await getClient(); + if (!client) return; + const description = props.workflowDescription?.slice(0, DESCRIPTION_MAX_LENGTH); + client.capture({ + distinctId: getTelemetryId(), + event: 'workflow_invoked', + properties: { + $process_person_profile: false, + workflow_name: props.workflowName, + ...(description ? { workflow_description: description } : {}), + ...(props.platform ? { platform: props.platform } : {}), + ...(props.archonVersion ? { archon_version: props.archonVersion } : {}), + }, + }); + } catch (error) { + getLog().debug({ err: error as Error }, 'telemetry.capture_failed'); + } + })(); +} + +/** + * Flush queued events and close the PostHog client. Call on process exit + * (server SIGTERM, end of CLI command) so buffered events aren't lost. + * Safe to call when telemetry was never initialized. + */ +export async function shutdownTelemetry(): Promise { + if (clientInit === undefined) return; + try { + const client = await clientInit; + if (client) { + await client.shutdown(); + } + } catch (error) { + getLog().debug({ err: error as Error }, 'telemetry.shutdown_failed'); + } finally { + clientInit = undefined; + } +} + +/** + * Reset internal state for tests. Not part of the public API. + * @internal + */ +export function resetTelemetryForTests(): void { + clientInit = undefined; + telemetryIdCache = undefined; +} diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 3d0d1bdcf5..deda58db26 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -79,7 +79,12 @@ import { getPort, } from '@archon/core'; import type { IPlatformAdapter } from '@archon/core'; -import { createLogger, logArchonPaths, validateAppDefaultsPaths } from '@archon/paths'; +import { + createLogger, + logArchonPaths, + validateAppDefaultsPaths, + shutdownTelemetry, +} from '@archon/paths'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ let cachedLog: ReturnType | undefined; @@ -640,6 +645,9 @@ export async function startServer(opts: ServerOptions = {}): Promise { getLog().error({ err: error }, 'adapter_stop_error'); } + // Flush queued telemetry events before pool closes the process. + await shutdownTelemetry(); + return pool.end(); }) .then(() => { diff --git a/packages/workflows/src/executor.ts b/packages/workflows/src/executor.ts index c84c3ac8ae..39b75e00c7 100644 --- a/packages/workflows/src/executor.ts +++ b/packages/workflows/src/executor.ts @@ -6,7 +6,7 @@ import { join } from 'path'; import type { IWorkflowPlatform, WorkflowMessageMetadata } from './deps'; import type { WorkflowDeps, WorkflowConfig } from './deps'; import * as archonPaths from '@archon/paths'; -import { createLogger } from '@archon/paths'; +import { createLogger, captureWorkflowInvoked, BUNDLED_VERSION } from '@archon/paths'; import { getDefaultBranch, toRepoPath } from '@archon/git'; import type { WorkflowDefinition, WorkflowRun, WorkflowExecutionResult } from './schemas'; import { executeDagWorkflow } from './dag-executor'; @@ -621,6 +621,16 @@ export async function executeWorkflow( workflowName: workflow.name, conversationId: conversationDbId, }); + + // Fire-and-forget anonymous usage telemetry. No PII: only workflow name + + // description (authored by the user in their YAML) + platform + version. + // Opt out via ARCHON_TELEMETRY_DISABLED=1 or DO_NOT_TRACK=1. + captureWorkflowInvoked({ + workflowName: workflow.name, + workflowDescription: workflow.description, + platform: platform.getPlatformType(), + archonVersion: BUNDLED_VERSION, + }); deps.store .createWorkflowEvent({ workflow_run_id: workflowRun.id,