Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions assistant/src/__tests__/skill-feature-flags-integration.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/**
* Integration tests for skill feature flag enforcement at system prompt,
* skill_load, and session-skill-tools projection layers.
*/
import { existsSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';

import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';

// ---------------------------------------------------------------------------
// Test-scoped temp directory and config state
// ---------------------------------------------------------------------------

const TEST_DIR = join(tmpdir(), `vellum-skill-flags-test-${crypto.randomUUID()}`);

let currentConfig: Record<string, unknown> = {
sandbox: { enabled: false, backend: 'native' },
featureFlags: {},
};

mock.module('../util/platform.js', () => ({
getRootDir: () => TEST_DIR,
getDataDir: () => TEST_DIR,
getWorkspaceDir: () => TEST_DIR,
getWorkspaceConfigPath: () => join(TEST_DIR, 'config.json'),
getWorkspaceSkillsDir: () => join(TEST_DIR, 'skills'),
getWorkspaceHooksDir: () => join(TEST_DIR, 'hooks'),
getWorkspacePromptPath: (file: string) => join(TEST_DIR, file),
ensureDataDir: () => {},
getSocketPath: () => join(TEST_DIR, 'vellum.sock'),
getPidPath: () => join(TEST_DIR, 'vellum.pid'),
getDbPath: () => join(TEST_DIR, 'data', 'assistant.db'),
getLogPath: () => join(TEST_DIR, 'logs', 'vellum.log'),
getHistoryPath: () => join(TEST_DIR, 'history'),
getHooksDir: () => join(TEST_DIR, 'hooks'),
getIpcBlobDir: () => join(TEST_DIR, 'ipc-blobs'),
getSandboxRootDir: () => join(TEST_DIR, 'sandbox'),
getSandboxWorkingDir: () => TEST_DIR,
getInterfacesDir: () => join(TEST_DIR, 'interfaces'),
isMacOS: () => false,
isLinux: () => false,
isWindows: () => false,
getPlatformName: () => 'linux',
getClipboardCommand: () => null,
removeSocketFile: () => {},
migratePath: () => {},
migrateToWorkspaceLayout: () => {},
migrateToDataLayout: () => {},
}));

mock.module('../util/logger.js', () => ({
getLogger: () => new Proxy({} as Record<string, unknown>, {
get: () => () => {},
}),
isDebug: () => false,
truncateForLog: (v: string) => v,
}));

mock.module('../config/loader.js', () => ({
getConfig: () => currentConfig,
}));

mock.module('../config/user-reference.js', () => ({
resolveUserReference: () => 'TestUser',
}));

mock.module('../security/parental-control-store.js', () => ({
getParentalControlSettings: () => ({ enabled: false, contentRestrictions: [], blockedToolCategories: [] }),
}));

mock.module('../tools/credentials/metadata-store.js', () => ({
listCredentialMetadata: () => [],
}));

const { buildSystemPrompt } = await import('../config/system-prompt.js');

// ---------------------------------------------------------------------------
// Setup / Teardown
// ---------------------------------------------------------------------------

beforeEach(() => {
mkdirSync(TEST_DIR, { recursive: true });
// Reset config to defaults before each test
currentConfig = {
sandbox: { enabled: false, backend: 'native' },
featureFlags: {},
};
});

afterEach(() => {
if (existsSync(TEST_DIR)) {
rmSync(TEST_DIR, { recursive: true, force: true });
}
});

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

function createSkillOnDisk(id: string, name: string, description: string): void {
const skillsDir = join(TEST_DIR, 'skills');
mkdirSync(join(skillsDir, id), { recursive: true });
writeFileSync(
join(skillsDir, id, 'SKILL.md'),
`---\nname: "${name}"\ndescription: "${description}"\n---\n\nInstructions for ${id}.\n`,
);
// Ensure SKILLS.md index references the skill
const indexPath = join(skillsDir, 'SKILLS.md');
const existing = existsSync(indexPath) ? require('node:fs').readFileSync(indexPath, 'utf-8') : '';
writeFileSync(indexPath, existing + `- ${id}\n`);
}

// ---------------------------------------------------------------------------
// System prompt — feature flag filtering
// ---------------------------------------------------------------------------

describe('buildSystemPrompt feature flag filtering', () => {
test('flag OFF skill does not appear in <available_skills> section', () => {
createSkillOnDisk('browser', 'Browser', 'Web browsing automation');
createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');

currentConfig = {
sandbox: { enabled: false, backend: 'native' },
featureFlags: { 'skills.browser.enabled': false },
};

const result = buildSystemPrompt();

// twitter should be visible, browser should not
expect(result).toContain('id="twitter"');
expect(result).not.toContain('id="browser"');
});

test('all skills visible when featureFlags is empty', () => {
createSkillOnDisk('browser', 'Browser', 'Web browsing automation');
createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');

currentConfig = {
sandbox: { enabled: false, backend: 'native' },
featureFlags: {},
};

const result = buildSystemPrompt();

expect(result).toContain('id="browser"');
expect(result).toContain('id="twitter"');
});

test('all skills hidden when all flags are OFF', () => {
createSkillOnDisk('browser', 'Browser', 'Web browsing automation');
createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');

currentConfig = {
sandbox: { enabled: false, backend: 'native' },
featureFlags: {
'skills.browser.enabled': false,
'skills.twitter.enabled': false,
},
};

const result = buildSystemPrompt();

expect(result).not.toContain('<available_skills>');
expect(result).not.toContain('id="browser"');
expect(result).not.toContain('id="twitter"');
});
});
154 changes: 154 additions & 0 deletions assistant/src/__tests__/skill-feature-flags.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import { describe, expect, test } from 'bun:test';

import type { AssistantConfig } from '../config/schema.js';
import { isSkillFeatureEnabled, resolveSkillStates } from '../config/skill-state.js';
import type { SkillSummary } from '../config/skills.js';

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

/** Create a minimal AssistantConfig with optional featureFlags. */
function makeConfig(overrides: Partial<AssistantConfig> = {}): AssistantConfig {
return {
featureFlags: {},
skills: {
entries: {},
load: { extraDirs: [], watch: true, watchDebounceMs: 250 },
install: { nodeManager: 'npm' },
allowBundled: null,
},
...overrides,
} as AssistantConfig;
}

/** Create a minimal SkillSummary for testing. */
function makeSkill(id: string, source: 'bundled' | 'managed' = 'bundled'): SkillSummary {
return {
id,
name: `${id} skill`,
description: `Description for ${id}`,
directoryPath: `/fake/skills/${id}`,
skillFilePath: `/fake/skills/${id}/SKILL.md`,
bundled: source === 'bundled',
userInvocable: true,
disableModelInvocation: false,
source,
};
}

// ---------------------------------------------------------------------------
// isSkillFeatureEnabled
// ---------------------------------------------------------------------------

describe('isSkillFeatureEnabled', () => {
test('returns true when featureFlags section is empty', () => {
const config = makeConfig({ featureFlags: {} });
expect(isSkillFeatureEnabled('browser', config)).toBe(true);
});

test('returns true when skill key is missing (default enabled)', () => {
const config = makeConfig({
featureFlags: { 'skills.other.enabled': true },
});
expect(isSkillFeatureEnabled('browser', config)).toBe(true);
});

test('returns true when skill key is explicitly true', () => {
const config = makeConfig({
featureFlags: { 'skills.browser.enabled': true },
});
expect(isSkillFeatureEnabled('browser', config)).toBe(true);
});

test('returns false when skill key is explicitly false', () => {
const config = makeConfig({
featureFlags: { 'skills.browser.enabled': false },
});
expect(isSkillFeatureEnabled('browser', config)).toBe(false);
});

test('returns true when featureFlags is undefined', () => {
const config = makeConfig();
// Simulate a config that somehow has no featureFlags key
delete (config as Record<string, unknown>).featureFlags;
expect(isSkillFeatureEnabled('browser', config)).toBe(true);
});
});

// ---------------------------------------------------------------------------
// resolveSkillStates — feature flag filtering
// ---------------------------------------------------------------------------

describe('resolveSkillStates with feature flags', () => {
test('flag OFF skill does not appear in resolved list', () => {
const catalog = [makeSkill('browser'), makeSkill('twitter')];
const config = makeConfig({
featureFlags: { 'skills.browser.enabled': false },
});

const resolved = resolveSkillStates(catalog, config);
const ids = resolved.map((r) => r.summary.id);

expect(ids).not.toContain('browser');
expect(ids).toContain('twitter');
});

test('flag ON skill appears normally', () => {
const catalog = [makeSkill('browser'), makeSkill('twitter')];
const config = makeConfig({
featureFlags: { 'skills.browser.enabled': true, 'skills.twitter.enabled': true },
});

const resolved = resolveSkillStates(catalog, config);
const ids = resolved.map((r) => r.summary.id);

expect(ids).toContain('browser');
expect(ids).toContain('twitter');
});

test('missing flag key defaults to enabled', () => {
const catalog = [makeSkill('browser')];
const config = makeConfig({ featureFlags: {} });

const resolved = resolveSkillStates(catalog, config);
expect(resolved.length).toBe(1);
expect(resolved[0].summary.id).toBe('browser');
});

test('feature flag OFF takes precedence over user-enabled config entry', () => {
const catalog = [makeSkill('browser')];
const config = makeConfig({
featureFlags: { 'skills.browser.enabled': false },
skills: {
entries: { browser: { enabled: true } },
load: { extraDirs: [], watch: true, watchDebounceMs: 250 },
install: { nodeManager: 'npm' },
allowBundled: null,
},
});

const resolved = resolveSkillStates(catalog, config);
// The skill should not appear at all — feature flag is a higher-priority gate
expect(resolved.length).toBe(0);
});

test('multiple skills with mixed flags', () => {
const catalog = [
makeSkill('browser'),
makeSkill('twitter'),
makeSkill('deploy'),
];
const config = makeConfig({
featureFlags: {
'skills.browser.enabled': false,
'skills.deploy.enabled': false,
},
});

const resolved = resolveSkillStates(catalog, config);
const ids = resolved.map((r) => r.summary.id);

expect(ids).toEqual(['twitter']);
});
});
Loading