From 301bae3031cc02cf74d14dde8cd6b3a33d8b84e0 Mon Sep 17 00:00:00 2001 From: ComBba Date: Tue, 12 May 2026 19:54:44 +0900 Subject: [PATCH] =?UTF-8?q?feat(openai):=20tune=20the=20compile=20call=20f?= =?UTF-8?q?or=20the=20task=20=E2=80=94=20reasoning=5Feffort:=20none,=20ver?= =?UTF-8?q?bosity:=20low;=20default=20gpt-5.4-mini?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The single LLM call vibe-mod makes (callOpenAI: NL rule → strict JSON, or a clarification) is mechanical translation, not reasoning. Configure it as such: - reasoning_effort: 'none' — no hidden reasoning; fast (~1.2–1.4s) and keeps the token budget from being eaten by reasoning. NB: 'none' is the gpt-5.4 family's value; the gpt-5.0/5.1-era 'minimal' is rejected by 5.4 ("Supported values are: 'none','low','medium','high','xhigh'"), and gpt-5-mini wants 'minimal' — so this is 5.4-family-specific, which is fine since the model options are restricted to that family. - verbosity: 'low' — terse JSON, no commentary. - max_completion_tokens: 600 (down from 700) — a compiled rule + a clarification fit comfortably; observed worst case ~150 out tokens. - still no `temperature` (gpt-5.x only accepts the default). devvit.json openaiModel options trimmed/relabelled to the three viable picks with measured numbers, default switched gpt-5.4-nano → gpt-5.4-mini: gpt-5.4-mini 7/7 median ~1.2s max ~1.8s ← recommended, fastest gpt-5.4-nano 7/7 median ~1.5s max ~1.7s ← cheapest gpt-5.4 7/7 median ~2.1s max ~4.2s ← full; slower, more cautious on ambiguous rules, no quality gain (index.ts fallback model also updated nano → mini.) smoketest: default request config now mirrors callOpenAI (reasoning_effort/ verbosity/max_completion_tokens), with REASONING_EFFORT/VERBOSITY/ MAX_COMPLETION_TOKENS env overrides for experiments; added per-call latency and the OPENAI_MODELS=a,b,c comparison table in earlier commits. Tightened one test case to an explicit threshold ("more than 90% of the letters are uppercase") so it doesn't penalise the more-cautious model for asking — now 7/7 on all three. tsc/lint/format/tests(152)/acceptance(4/4) all green; smoke test 7/7 × 3 models. Co-Authored-By: Claude Opus 4.7 (1M context) --- devvit.json | 14 +++++++++----- scripts/openai-smoketest.ts | 27 +++++++++++++++++++++++---- src/server/index.ts | 21 ++++++++++++++------- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/devvit.json b/devvit.json index 14d24a8..ecb5383 100644 --- a/devvit.json +++ b/devvit.json @@ -18,18 +18,22 @@ }, "openaiModel": { "type": "select", - "label": "OpenAI model (developer-set default)", + "label": "OpenAI model for rule compilation (reasoning_effort: none, verbosity: low)", "options": [ { - "label": "gpt-5.4-nano (recommended — fast ~1.4s, cheapest, 7/7 on the rule-compile smoke test)", + "label": "gpt-5.4-mini (recommended — fastest ~1.1s, 7/7 on the rule-compile smoke test)", + "value": "gpt-5.4-mini" + }, + { + "label": "gpt-5.4-nano (cheapest, ~1.4s, also 7/7)", "value": "gpt-5.4-nano" }, { - "label": "gpt-5.4-mini (fast ~1.2s, equal quality, a bit pricier — pick this if cost is free for you)", - "value": "gpt-5.4-mini" + "label": "gpt-5.4 (full — slower ~1.8s, more cautious about ambiguous rules; no quality gain for this task)", + "value": "gpt-5.4" } ], - "defaultValue": "gpt-5.4-nano" + "defaultValue": "gpt-5.4-mini" } }, "subreddit": { diff --git a/scripts/openai-smoketest.ts b/scripts/openai-smoketest.ts index cb35687..594f4eb 100644 --- a/scripts/openai-smoketest.ts +++ b/scripts/openai-smoketest.ts @@ -17,8 +17,10 @@ // // NOT part of `npm run check` / CI (needs a real key). // -// IMPORTANT: keep the request payload below in sync with `callOpenAI` in -// src/server/index.ts (same response_format, max_completion_tokens, no temperature). +// IMPORTANT: the default request config below mirrors `callOpenAI` in +// src/server/index.ts (response_format: json_object, reasoning_effort: 'none', +// verbosity: 'low', max_completion_tokens: 600, no temperature). Env vars +// REASONING_EFFORT / VERBOSITY / MAX_COMPLETION_TOKENS override for experiments. import { readFileSync, existsSync } from 'node:fs'; import { join } from 'node:path'; @@ -43,6 +45,14 @@ const MODELS = (process.env.OPENAI_MODELS?.trim() || process.env.OPENAI_MODEL?.t .split(',') .map((s) => s.trim()) .filter(Boolean); +// Request tuning — DEFAULTS MATCH callOpenAI() in src/server/index.ts. Override +// via env to experiment. `none` reasoning (gpt-5.4 family value; older models +// use `minimal`) suits this mechanical NL→JSON task: fast, no token-budget burn. +// Set REASONING_EFFORT='' to omit the param entirely. +const REASONING_EFFORT = + process.env.REASONING_EFFORT === '' ? undefined : process.env.REASONING_EFFORT?.trim() || 'none'; // none | low | medium | high | xhigh +const VERBOSITY = process.env.VERBOSITY === '' ? undefined : process.env.VERBOSITY?.trim() || 'low'; // low | medium | high +const MAX_COMPLETION_TOKENS = Number(process.env.MAX_COMPLETION_TOKENS) || 600; if (!API_KEY) { console.error( @@ -63,7 +73,7 @@ const CASES: Case[] = [ rule: 'If a post title is at least 12 characters and more than 70% capital letters, add the flair "Edit your title?"', expect: 'rule', }, - { rule: 'Report comments that are over 60 characters and almost entirely uppercase', expect: 'rule' }, + { rule: 'Report comments over 60 characters where more than 90% of the letters are uppercase', expect: 'rule' }, { rule: 'Send to the mod queue any post linking to a known URL shortener (bit.ly, tinyurl.com, t.co)', expect: 'rule', @@ -90,11 +100,20 @@ async function compile(model: string, userRule: string): Promise { } messages.push({ role: 'user', content: userRule }); + const body: Record = { + model, + response_format: { type: 'json_object' }, + messages, + max_completion_tokens: MAX_COMPLETION_TOKENS, + }; + if (REASONING_EFFORT) body.reasoning_effort = REASONING_EFFORT; + if (VERBOSITY) body.verbosity = VERBOSITY; + const t0 = performance.now(); const resp = await fetch('https://api.openai.com/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${API_KEY}` }, - body: JSON.stringify({ model, response_format: { type: 'json_object' }, messages, max_completion_tokens: 700 }), + body: JSON.stringify(body), }); if (!resp.ok) { let code: string | undefined; diff --git a/src/server/index.ts b/src/server/index.ts index 04912a1..243e748 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -294,7 +294,7 @@ app.post('/internal/form/compose-rule-submit', async (c) => { schemaVersion: '1.0.0', bundleVersion: 0, compiledAt: Date.now(), - llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano', + llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini', llmTokensIn: 0, llmTokensOut: 0, rules: [], @@ -700,7 +700,7 @@ async function callOpenAI( const apiKey = (subKey?.trim() || globalKey || '').trim(); if (!apiKey) throw new Error('no_key'); - const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano'; + const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini'; const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [ { role: 'system', content: VIBE_MOD_SYSTEM_PROMPT }, @@ -720,13 +720,20 @@ async function callOpenAI( method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` }, body: JSON.stringify({ - model, + model, // gpt-5.4-mini (default) / gpt-5.4-nano / gpt-5.4 — see devvit.json openaiModel response_format: { type: 'json_object' }, messages, - // Newer OpenAI models (gpt-5.x family) require max_completion_tokens (not max_tokens) - // and only accept the default temperature, so we don't send `temperature`. Determinism - // is carried by response_format: json_object + the strict prompt + few-shot examples. - max_completion_tokens: 700, + // Tuned for what this call is: a mechanical NL → strict-JSON translation. + // reasoning_effort: 'none' — no hidden reasoning needed; keeps it fast and stops the + // token budget being eaten by reasoning (gpt-5.4 family value; + // older models call this 'minimal'). Measured ~1.1–1.4s. + // verbosity: 'low' — terse JSON, no commentary. + // max_completion_tokens — a compiled rule + a clarification fit well under 600. + // (no `temperature` — the gpt-5.x family only accepts the default; max_tokens isn't + // supported on these models, use max_completion_tokens.) + reasoning_effort: 'none', + verbosity: 'low', + max_completion_tokens: 600, }), });