diff --git a/devvit.json b/devvit.json index 14d24a8..ecb5383 100644 --- a/devvit.json +++ b/devvit.json @@ -18,18 +18,22 @@ }, "openaiModel": { "type": "select", - "label": "OpenAI model (developer-set default)", + "label": "OpenAI model for rule compilation (reasoning_effort: none, verbosity: low)", "options": [ { - "label": "gpt-5.4-nano (recommended — fast ~1.4s, cheapest, 7/7 on the rule-compile smoke test)", + "label": "gpt-5.4-mini (recommended — fastest ~1.1s, 7/7 on the rule-compile smoke test)", + "value": "gpt-5.4-mini" + }, + { + "label": "gpt-5.4-nano (cheapest, ~1.4s, also 7/7)", "value": "gpt-5.4-nano" }, { - "label": "gpt-5.4-mini (fast ~1.2s, equal quality, a bit pricier — pick this if cost is free for you)", - "value": "gpt-5.4-mini" + "label": "gpt-5.4 (full — slower ~1.8s, more cautious about ambiguous rules; no quality gain for this task)", + "value": "gpt-5.4" } ], - "defaultValue": "gpt-5.4-nano" + "defaultValue": "gpt-5.4-mini" } }, "subreddit": { diff --git a/scripts/openai-smoketest.ts b/scripts/openai-smoketest.ts index cb35687..594f4eb 100644 --- a/scripts/openai-smoketest.ts +++ b/scripts/openai-smoketest.ts @@ -17,8 +17,10 @@ // // NOT part of `npm run check` / CI (needs a real key). // -// IMPORTANT: keep the request payload below in sync with `callOpenAI` in -// src/server/index.ts (same response_format, max_completion_tokens, no temperature). +// IMPORTANT: the default request config below mirrors `callOpenAI` in +// src/server/index.ts (response_format: json_object, reasoning_effort: 'none', +// verbosity: 'low', max_completion_tokens: 600, no temperature). Env vars +// REASONING_EFFORT / VERBOSITY / MAX_COMPLETION_TOKENS override for experiments. import { readFileSync, existsSync } from 'node:fs'; import { join } from 'node:path'; @@ -43,6 +45,14 @@ const MODELS = (process.env.OPENAI_MODELS?.trim() || process.env.OPENAI_MODEL?.t .split(',') .map((s) => s.trim()) .filter(Boolean); +// Request tuning — DEFAULTS MATCH callOpenAI() in src/server/index.ts. Override +// via env to experiment. `none` reasoning (gpt-5.4 family value; older models +// use `minimal`) suits this mechanical NL→JSON task: fast, no token-budget burn. +// Set REASONING_EFFORT='' to omit the param entirely. +const REASONING_EFFORT = + process.env.REASONING_EFFORT === '' ? undefined : process.env.REASONING_EFFORT?.trim() || 'none'; // none | low | medium | high | xhigh +const VERBOSITY = process.env.VERBOSITY === '' ? undefined : process.env.VERBOSITY?.trim() || 'low'; // low | medium | high +const MAX_COMPLETION_TOKENS = Number(process.env.MAX_COMPLETION_TOKENS) || 600; if (!API_KEY) { console.error( @@ -63,7 +73,7 @@ const CASES: Case[] = [ rule: 'If a post title is at least 12 characters and more than 70% capital letters, add the flair "Edit your title?"', expect: 'rule', }, - { rule: 'Report comments that are over 60 characters and almost entirely uppercase', expect: 'rule' }, + { rule: 'Report comments over 60 characters where more than 90% of the letters are uppercase', expect: 'rule' }, { rule: 'Send to the mod queue any post linking to a known URL shortener (bit.ly, tinyurl.com, t.co)', expect: 'rule', @@ -90,11 +100,20 @@ async function compile(model: string, userRule: string): Promise { } messages.push({ role: 'user', content: userRule }); + const body: Record = { + model, + response_format: { type: 'json_object' }, + messages, + max_completion_tokens: MAX_COMPLETION_TOKENS, + }; + if (REASONING_EFFORT) body.reasoning_effort = REASONING_EFFORT; + if (VERBOSITY) body.verbosity = VERBOSITY; + const t0 = performance.now(); const resp = await fetch('https://api.openai.com/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${API_KEY}` }, - body: JSON.stringify({ model, response_format: { type: 'json_object' }, messages, max_completion_tokens: 700 }), + body: JSON.stringify(body), }); if (!resp.ok) { let code: string | undefined; diff --git a/src/server/index.ts b/src/server/index.ts index 04912a1..243e748 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -294,7 +294,7 @@ app.post('/internal/form/compose-rule-submit', async (c) => { schemaVersion: '1.0.0', bundleVersion: 0, compiledAt: Date.now(), - llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano', + llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini', llmTokensIn: 0, llmTokensOut: 0, rules: [], @@ -700,7 +700,7 @@ async function callOpenAI( const apiKey = (subKey?.trim() || globalKey || '').trim(); if (!apiKey) throw new Error('no_key'); - const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano'; + const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini'; const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [ { role: 'system', content: VIBE_MOD_SYSTEM_PROMPT }, @@ -720,13 +720,20 @@ async function callOpenAI( method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` }, body: JSON.stringify({ - model, + model, // gpt-5.4-mini (default) / gpt-5.4-nano / gpt-5.4 — see devvit.json openaiModel response_format: { type: 'json_object' }, messages, - // Newer OpenAI models (gpt-5.x family) require max_completion_tokens (not max_tokens) - // and only accept the default temperature, so we don't send `temperature`. Determinism - // is carried by response_format: json_object + the strict prompt + few-shot examples. - max_completion_tokens: 700, + // Tuned for what this call is: a mechanical NL → strict-JSON translation. + // reasoning_effort: 'none' — no hidden reasoning needed; keeps it fast and stops the + // token budget being eaten by reasoning (gpt-5.4 family value; + // older models call this 'minimal'). Measured ~1.1–1.4s. + // verbosity: 'low' — terse JSON, no commentary. + // max_completion_tokens — a compiled rule + a clarification fit well under 600. + // (no `temperature` — the gpt-5.x family only accepts the default; max_tokens isn't + // supported on these models, use max_completion_tokens.) + reasoning_effort: 'none', + verbosity: 'low', + max_completion_tokens: 600, }), });