Skip to content
This repository was archived by the owner on Jun 19, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions devvit.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
"type": "select",
"label": "OpenAI model (developer-set default)",
"options": [
{ "label": "gpt-5.4-nano (recommended — latest, cheapest, json_object support)", "value": "gpt-5.4-nano" },
{ "label": "gpt-5.4-mini (latest, higher quality, ~7× more expensive)", "value": "gpt-5.4-mini" },
{ "label": "gpt-5-mini (previous-gen, fallback)", "value": "gpt-5-mini" },
{ "label": "gpt-5-nano (previous-gen, cheapest fallback)", "value": "gpt-5-nano" }
{
"label": "gpt-5.4-nano (recommended — fast ~1.4s, cheapest, 7/7 on the rule-compile smoke test)",
"value": "gpt-5.4-nano"
},
{
"label": "gpt-5.4-mini (fast ~1.2s, equal quality, a bit pricier — pick this if cost is free for you)",
"value": "gpt-5.4-mini"
}
],
"defaultValue": "gpt-5.4-nano"
}
Expand Down
150 changes: 93 additions & 57 deletions scripts/openai-smoketest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,25 @@
// examples and a handful of representative moderator rules, then validates each
// response against the Rule schema (or detects the clarification escape hatch).
// Verifies the prompt ↔ schema ↔ model triad — the one thing the mocked route
// tests can't: does `gpt-5.4-nano` exist, honour response_format: json_object,
// and emit schema-valid rules from this prompt? Also reports token usage / $.
// tests can't: does the model exist, honour response_format: json_object, and
// emit schema-valid rules from this prompt? Reports per-call latency, token
// usage and a $ estimate, and (with OPENAI_MODELS) compares several models.
//
// OPENAI_API_KEY=sk-... npm run openai:smoketest
// OPENAI_MODEL=gpt-5.4-mini npm run openai:smoketest # override the model
// OPENAI_MODEL=gpt-5.4-mini npm run openai:smoketest # one model
// OPENAI_MODELS=gpt-5.4-nano,gpt-5.4-mini,gpt-4.1-mini npm run openai:smoketest # compare
//
// The key is read from the OPENAI_API_KEY env var, or — if not set — from a
// `.env` file in the repo root (which is git-ignored). Never commit a key, and
// don't paste it into chat: put it in `.env` or `export` it in your shell.
// The key is read from $OPENAI_API_KEY, or — if not set — from a git-ignored
// `.env` at the repo root. Never commit a key; don't paste it into chat.
//
// NOTE: this is intentionally NOT part of `npm run check` or CI — it costs a few
// fractions of a cent per run and needs a real key.
// NOT part of `npm run check` / CI (needs a real key).
//
// IMPORTANT: keep the request payload below in sync with `callOpenAI` in
// src/server/index.ts (same model default, response_format, max_completion_tokens).
// src/server/index.ts (same response_format, max_completion_tokens, no temperature).

import { readFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import { performance } from 'node:perf_hooks';
import { VIBE_MOD_SYSTEM_PROMPT, FEW_SHOT_EXAMPLES } from '../src/shared/system-prompt';
import { Rule, checkTreeDepth, type RuleType } from '../src/shared/rule-schema';

Expand All @@ -38,7 +39,10 @@ function loadEnvFile(): void {
loadEnvFile();

const API_KEY = process.env.OPENAI_API_KEY?.trim();
const MODEL = process.env.OPENAI_MODEL?.trim() || 'gpt-5.4-nano';
const MODELS = (process.env.OPENAI_MODELS?.trim() || process.env.OPENAI_MODEL?.trim() || 'gpt-5.4-nano')
.split(',')
.map((s) => s.trim())
.filter(Boolean);

if (!API_KEY) {
console.error(
Expand All @@ -56,7 +60,6 @@ const CASES: Case[] = [
},
{ rule: 'Remove posts that contain discord.gg links from accounts with under 50 karma', expect: 'rule' },
{
// explicit numeric threshold → should compile against content.title.upperCaseRatio
rule: 'If a post title is at least 12 characters and more than 70% capital letters, add the flair "Edit your title?"',
expect: 'rule',
},
Expand All @@ -71,12 +74,12 @@ const CASES: Case[] = [
];

type ApiResult =
| { kind: 'rule'; rule: RuleType; tokensIn: number; tokensOut: number }
| { kind: 'clarification'; question: string; tokensIn: number; tokensOut: number }
| { kind: 'invalid'; raw: unknown; reason: string; tokensIn: number; tokensOut: number }
| { kind: 'http_error'; status: number; code?: string; message?: string };
| { kind: 'rule'; rule: RuleType; tokensIn: number; tokensOut: number; ms: number }
| { kind: 'clarification'; question: string; tokensIn: number; tokensOut: number; ms: number }
| { kind: 'invalid'; raw: unknown; reason: string; tokensIn: number; tokensOut: number; ms: number }
| { kind: 'http_error'; status: number; code?: string; message?: string; ms: number };

async function compile(userRule: string): Promise<ApiResult> {
async function compile(model: string, userRule: string): Promise<ApiResult> {
// Mirrors callOpenAI() in src/server/index.ts.
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [
{ role: 'system', content: VIBE_MOD_SYSTEM_PROMPT },
Expand All @@ -87,16 +90,11 @@ async function compile(userRule: string): Promise<ApiResult> {
}
messages.push({ role: 'user', content: userRule });

const t0 = performance.now();
const resp = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${API_KEY}` },
body: JSON.stringify({
model: MODEL,
response_format: { type: 'json_object' },
messages,
// Newer OpenAI models (gpt-5.x family) require max_completion_tokens, not max_tokens.
max_completion_tokens: 700,
}),
body: JSON.stringify({ model, response_format: { type: 'json_object' }, messages, max_completion_tokens: 700 }),
});
if (!resp.ok) {
let code: string | undefined;
Expand All @@ -108,13 +106,14 @@ async function compile(userRule: string): Promise<ApiResult> {
} catch {
/* body wasn't JSON */
}
return { kind: 'http_error', status: resp.status, code, message };
return { kind: 'http_error', status: resp.status, code, message, ms: performance.now() - t0 };
}

const data = (await resp.json()) as {
choices: Array<{ message: { content: string } }>;
usage?: { prompt_tokens?: number; completion_tokens?: number };
};
const ms = performance.now() - t0;
const tokensIn = data.usage?.prompt_tokens ?? 0;
const tokensOut = data.usage?.completion_tokens ?? 0;
let parsed: unknown;
Expand All @@ -127,9 +126,9 @@ async function compile(userRule: string): Promise<ApiResult> {
reason: `not JSON: ${e instanceof Error ? e.message : String(e)}`,
tokensIn,
tokensOut,
ms,
};
}

if (
parsed &&
typeof parsed === 'object' &&
Expand All @@ -140,6 +139,7 @@ async function compile(userRule: string): Promise<ApiResult> {
question: String((parsed as { question?: string }).question ?? ''),
tokensIn,
tokensOut,
ms,
};
}
try {
Expand All @@ -152,83 +152,119 @@ async function compile(userRule: string): Promise<ApiResult> {
};
const rule = Rule.parse(augmented);
checkTreeDepth(rule.when as Parameters<typeof checkTreeDepth>[0]);
return { kind: 'rule', rule, tokensIn, tokensOut };
return { kind: 'rule', rule, tokensIn, tokensOut, ms };
} catch (e) {
return { kind: 'invalid', raw: parsed, reason: e instanceof Error ? e.message : String(e), tokensIn, tokensOut };
return {
kind: 'invalid',
raw: parsed,
reason: e instanceof Error ? e.message : String(e),
tokensIn,
tokensOut,
ms,
};
}
}

// gpt-5.4-nano list price (developers.openai.com/api/docs/pricing) — adjust if you change MODEL.
const PRICE_PER_M = { in: 0.05, out: 0.4 }; // USD per 1M tokens

(async () => {
console.log(`OpenAI smoke test — model: ${MODEL}\n`);
let pass = 0;
let fail = 0;
let totIn = 0;
let totOut = 0;
type ModelSummary = {
model: string;
pass: number;
total: number;
tokensIn: number;
tokensOut: number;
ms: number[];
fatal: boolean;
};

async function runModel(model: string): Promise<ModelSummary> {
console.log(`════════ ${model} ════════\n`);
const s: ModelSummary = { model, pass: 0, total: CASES.length, tokensIn: 0, tokensOut: 0, ms: [], fatal: false };
for (const c of CASES) {
process.stdout.write(`• "${c.rule.slice(0, 70)}${c.rule.length > 70 ? '…' : ''}"\n`);
process.stdout.write(`• "${c.rule.slice(0, 64)}${c.rule.length > 64 ? '…' : ''}"\n`);
let r: ApiResult;
try {
r = await compile(c.rule);
r = await compile(model, c.rule);
} catch (e) {
console.log(` ✗ request threw: ${e instanceof Error ? e.message : String(e)}\n`);
fail++;
continue;
}
if (r.kind === 'http_error') {
const billingCodes = ['insufficient_quota', 'billing_not_active', 'account_deactivated'];
const fatal = r.status === 401 || r.code === 'model_not_found' || billingCodes.includes(r.code ?? '');
const fatal =
r.status === 401 || r.status === 403 || r.code === 'model_not_found' || billingCodes.includes(r.code ?? '');
const hint =
r.status === 404 || r.code === 'model_not_found'
? ` → model "${MODEL}" not available to this key (try OPENAI_MODEL=gpt-5-mini, or check the exact model name)`
r.status === 404 || r.status === 403 || r.code === 'model_not_found'
? ` → model "${model}" not available to this key`
: r.status === 401
? ' → invalid API key'
: billingCodes.includes(r.code ?? '')
? ' → OpenAI account billing is not active — add a payment method at platform.openai.com/account/billing (the key is fine; the account just is not enabled for API use yet)'
? ' → OpenAI account billing is not active — add a payment method at platform.openai.com/account/billing'
: r.status === 429
? ' → rate-limited — wait and re-run, or raise your usage limits'
? ' → rate-limited — wait and re-run'
: '';
console.log(
` ✗ HTTP ${r.status}${r.code ? ` (${r.code})` : ''}${r.message ? ` — ${r.message}` : ''}${hint}\n`,
);
fail++;
// No point hammering the API if it's a key/model/billing problem rather than a transient blip.
if (fatal) {
console.log('Aborting remaining cases — fix the above and re-run.\n');
s.fatal = true;
console.log('Skipping remaining cases for this model.\n');
break;
}
continue;
}
totIn += r.tokensIn;
totOut += r.tokensOut;
s.tokensIn += r.tokensIn;
s.tokensOut += r.tokensOut;
s.ms.push(r.ms);
const ok =
(r.kind === 'rule' && c.expect === 'rule') || (r.kind === 'clarification' && c.expect === 'clarification');
if (ok) pass++;
else fail++;
if (ok) s.pass++;
const mark = ok ? '✓' : '✗';
const t = `${Math.round(r.ms)}ms`;
if (r.kind === 'rule')
console.log(
` ${mark} rule id=${r.rule.id} on=[${r.rule.on.join(',')}] then=[${r.rule.then.map((a) => a.action).join(',')}] (in ${r.tokensIn} / out ${r.tokensOut})${c.expect !== 'rule' ? ' ← expected clarification' : ''}`,
` ${mark} ${t} rule id=${r.rule.id} on=[${r.rule.on.join(',')}] then=[${r.rule.then.map((a) => a.action).join(',')}] (in ${r.tokensIn}/out ${r.tokensOut})${c.expect !== 'rule' ? ' ← expected clarification' : ''}`,
);
else if (r.kind === 'clarification')
console.log(
` ${mark} clarification: "${r.question.slice(0, 90)}" (in ${r.tokensIn} / out ${r.tokensOut})${c.expect !== 'clarification' ? ' ← expected a rule' : ''}`,
` ${mark} ${t} clarification: "${r.question.slice(0, 80)}" (in ${r.tokensIn}/out ${r.tokensOut})${c.expect !== 'clarification' ? ' ← expected a rule' : ''}`,
);
else
console.log(
` ✗ invalid output — ${r.reason}\n raw: ${JSON.stringify(r.raw).slice(0, 300)} (in ${r.tokensIn} / out ${r.tokensOut})`,
` ✗ ${t} invalid — ${r.reason} (in ${r.tokensIn}/out ${r.tokensOut})\n raw: ${JSON.stringify(r.raw).slice(0, 240)}`,
);
console.log('');
}

const cost = (totIn / 1_000_000) * PRICE_PER_M.in + (totOut / 1_000_000) * PRICE_PER_M.out;
console.log('────────────────────────────────────────────────────────');
const cost = (s.tokensIn / 1_000_000) * PRICE_PER_M.in + (s.tokensOut / 1_000_000) * PRICE_PER_M.out;
const med = s.ms.length ? [...s.ms].sort((a, b) => a - b)[Math.floor(s.ms.length / 2)] : 0;
console.log(
`${pass}/${CASES.length} cases as expected. tokens: ${totIn} in / ${totOut} out ≈ $${cost.toFixed(5)} (at ${MODEL} list price)`,
` → ${s.pass}/${s.total} as expected · latency median ${Math.round(med)}ms (min ${Math.round(Math.min(...s.ms, 0))} / max ${Math.round(Math.max(...s.ms, 0))}) · tokens ${s.tokensIn} in / ${s.tokensOut} out · ≈ $${cost.toFixed(5)} (at gpt-5.4-nano list price)\n`,

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for calculating the minimum latency is incorrect. Math.min(...s.ms, 0) will always return 0 because all measured latencies are positive numbers. This results in the reported min value always being 0ms regardless of the actual performance. You should guard against empty arrays and remove the 0 from the Math.min arguments to capture the actual minimum measured latency.

Suggested change
` → ${s.pass}/${s.total} as expected · latency median ${Math.round(med)}ms (min ${Math.round(Math.min(...s.ms, 0))} / max ${Math.round(Math.max(...s.ms, 0))}) · tokens ${s.tokensIn} in / ${s.tokensOut} out · ≈ $${cost.toFixed(5)} (at gpt-5.4-nano list price)\n`,
` → ${s.pass}/${s.total} as expected · latency median ${Math.round(med)}ms (min ${s.ms.length ? Math.round(Math.min(...s.ms)) : 0} / max ${s.ms.length ? Math.round(Math.max(...s.ms)) : 0}) · tokens ${s.tokensIn} in / ${s.tokensOut} out · ≈ $${cost.toFixed(5)} (at gpt-5.4-nano list price)\n`,

);
console.log('────────────────────────────────────────────────────────');
process.exit(fail > 0 ? 1 : 0);
return s;
}

(async () => {
const results: ModelSummary[] = [];
for (const m of MODELS) results.push(await runModel(m));

if (results.length > 1) {
console.log('════════ comparison ════════');
console.log('model'.padEnd(22) + 'pass'.padEnd(8) + 'median'.padEnd(10) + 'max'.padEnd(10) + 'avg out tok');
for (const r of results) {
const med = r.ms.length ? [...r.ms].sort((a, b) => a - b)[Math.floor(r.ms.length / 2)] : 0;
const avgOut = r.ms.length ? Math.round(r.tokensOut / r.ms.length) : 0;
console.log(
r.model.padEnd(22) +
`${r.pass}/${r.total}`.padEnd(8) +
`${Math.round(med)}ms`.padEnd(10) +
`${r.ms.length ? Math.round(Math.max(...r.ms)) : 0}ms`.padEnd(10) +
`${avgOut}${r.fatal ? ' (unavailable)' : ''}`,
);
}
console.log('');
}

const anyFail = results.some((r) => r.pass < r.total || r.fatal);
process.exit(anyFail ? 1 : 0);
})();