From 301bae3031cc02cf74d14dde8cd6b3a33d8b84e0 Mon Sep 17 00:00:00 2001
From: ComBba <app.2weeks@gmail.com>
Date: Tue, 12 May 2026 19:54:44 +0900
Subject: [PATCH] =?UTF-8?q?feat(openai):=20tune=20the=20compile=20call=20f?=
 =?UTF-8?q?or=20the=20task=20=E2=80=94=20reasoning=5Feffort:=20none,=20ver?=
 =?UTF-8?q?bosity:=20low;=20default=20gpt-5.4-mini?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The single LLM call vibe-mod makes (callOpenAI: NL rule → strict JSON, or a
clarification) is mechanical translation, not reasoning. Configure it as such:
  - reasoning_effort: 'none'  — no hidden reasoning; fast (~1.2–1.4s) and keeps
    the token budget from being eaten by reasoning. NB: 'none' is the gpt-5.4
    family's value; the gpt-5.0/5.1-era 'minimal' is rejected by 5.4 ("Supported
    values are: 'none','low','medium','high','xhigh'"), and gpt-5-mini wants
    'minimal' — so this is 5.4-family-specific, which is fine since the model
    options are restricted to that family.
  - verbosity: 'low'          — terse JSON, no commentary.
  - max_completion_tokens: 600 (down from 700) — a compiled rule + a clarification
    fit comfortably; observed worst case ~150 out tokens.
  - still no `temperature` (gpt-5.x only accepts the default).

devvit.json openaiModel options trimmed/relabelled to the three viable picks
with measured numbers, default switched gpt-5.4-nano → gpt-5.4-mini:
  gpt-5.4-mini  7/7  median ~1.2s  max ~1.8s   ← recommended, fastest
  gpt-5.4-nano  7/7  median ~1.5s  max ~1.7s   ← cheapest
  gpt-5.4       7/7  median ~2.1s  max ~4.2s   ← full; slower, more cautious on
                                                ambiguous rules, no quality gain
(index.ts fallback model also updated nano → mini.)

smoketest: default request config now mirrors callOpenAI (reasoning_effort/
verbosity/max_completion_tokens), with REASONING_EFFORT/VERBOSITY/
MAX_COMPLETION_TOKENS env overrides for experiments; added per-call latency and
the OPENAI_MODELS=a,b,c comparison table in earlier commits. Tightened one test
case to an explicit threshold ("more than 90% of the letters are uppercase") so
it doesn't penalise the more-cautious model for asking — now 7/7 on all three.

tsc/lint/format/tests(152)/acceptance(4/4) all green; smoke test 7/7 × 3 models.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 devvit.json                 | 14 +++++++++-----
 scripts/openai-smoketest.ts | 27 +++++++++++++++++++++++----
 src/server/index.ts         | 21 ++++++++++++++-------
 3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/devvit.json b/devvit.json
index 14d24a8..ecb5383 100644
--- a/devvit.json
+++ b/devvit.json
@@ -18,18 +18,22 @@
       },
       "openaiModel": {
         "type": "select",
-        "label": "OpenAI model (developer-set default)",
+        "label": "OpenAI model for rule compilation (reasoning_effort: none, verbosity: low)",
         "options": [
           {
-            "label": "gpt-5.4-nano (recommended — fast ~1.4s, cheapest, 7/7 on the rule-compile smoke test)",
+            "label": "gpt-5.4-mini (recommended — fastest ~1.1s, 7/7 on the rule-compile smoke test)",
+            "value": "gpt-5.4-mini"
+          },
+          {
+            "label": "gpt-5.4-nano (cheapest, ~1.4s, also 7/7)",
             "value": "gpt-5.4-nano"
           },
           {
-            "label": "gpt-5.4-mini (fast ~1.2s, equal quality, a bit pricier — pick this if cost is free for you)",
-            "value": "gpt-5.4-mini"
+            "label": "gpt-5.4 (full — slower ~1.8s, more cautious about ambiguous rules; no quality gain for this task)",
+            "value": "gpt-5.4"
           }
         ],
-        "defaultValue": "gpt-5.4-nano"
+        "defaultValue": "gpt-5.4-mini"
       }
     },
     "subreddit": {
diff --git a/scripts/openai-smoketest.ts b/scripts/openai-smoketest.ts
index cb35687..594f4eb 100644
--- a/scripts/openai-smoketest.ts
+++ b/scripts/openai-smoketest.ts
@@ -17,8 +17,10 @@
 //
 // NOT part of `npm run check` / CI (needs a real key).
 //
-// IMPORTANT: keep the request payload below in sync with `callOpenAI` in
-// src/server/index.ts (same response_format, max_completion_tokens, no temperature).
+// IMPORTANT: the default request config below mirrors `callOpenAI` in
+// src/server/index.ts (response_format: json_object, reasoning_effort: 'none',
+// verbosity: 'low', max_completion_tokens: 600, no temperature). Env vars
+// REASONING_EFFORT / VERBOSITY / MAX_COMPLETION_TOKENS override for experiments.
 
 import { readFileSync, existsSync } from 'node:fs';
 import { join } from 'node:path';
@@ -43,6 +45,14 @@ const MODELS = (process.env.OPENAI_MODELS?.trim() || process.env.OPENAI_MODEL?.t
   .split(',')
   .map((s) => s.trim())
   .filter(Boolean);
+// Request tuning — DEFAULTS MATCH callOpenAI() in src/server/index.ts. Override
+// via env to experiment. `none` reasoning (gpt-5.4 family value; older models
+// use `minimal`) suits this mechanical NL→JSON task: fast, no token-budget burn.
+// Set REASONING_EFFORT='' to omit the param entirely.
+const REASONING_EFFORT =
+  process.env.REASONING_EFFORT === '' ? undefined : process.env.REASONING_EFFORT?.trim() || 'none'; // none | low | medium | high | xhigh
+const VERBOSITY = process.env.VERBOSITY === '' ? undefined : process.env.VERBOSITY?.trim() || 'low'; // low | medium | high
+const MAX_COMPLETION_TOKENS = Number(process.env.MAX_COMPLETION_TOKENS) || 600;
 
 if (!API_KEY) {
   console.error(
@@ -63,7 +73,7 @@ const CASES: Case[] = [
     rule: 'If a post title is at least 12 characters and more than 70% capital letters, add the flair "Edit your title?"',
     expect: 'rule',
   },
-  { rule: 'Report comments that are over 60 characters and almost entirely uppercase', expect: 'rule' },
+  { rule: 'Report comments over 60 characters where more than 90% of the letters are uppercase', expect: 'rule' },
   {
     rule: 'Send to the mod queue any post linking to a known URL shortener (bit.ly, tinyurl.com, t.co)',
     expect: 'rule',
@@ -90,11 +100,20 @@ async function compile(model: string, userRule: string): Promise<ApiResult> {
   }
   messages.push({ role: 'user', content: userRule });
 
+  const body: Record<string, unknown> = {
+    model,
+    response_format: { type: 'json_object' },
+    messages,
+    max_completion_tokens: MAX_COMPLETION_TOKENS,
+  };
+  if (REASONING_EFFORT) body.reasoning_effort = REASONING_EFFORT;
+  if (VERBOSITY) body.verbosity = VERBOSITY;
+
   const t0 = performance.now();
   const resp = await fetch('https://api.openai.com/v1/chat/completions', {
     method: 'POST',
     headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${API_KEY}` },
-    body: JSON.stringify({ model, response_format: { type: 'json_object' }, messages, max_completion_tokens: 700 }),
+    body: JSON.stringify(body),
   });
   if (!resp.ok) {
     let code: string | undefined;
diff --git a/src/server/index.ts b/src/server/index.ts
index 04912a1..243e748 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -294,7 +294,7 @@ app.post('/internal/form/compose-rule-submit', async (c) => {
         schemaVersion: '1.0.0',
         bundleVersion: 0,
         compiledAt: Date.now(),
-        llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano',
+        llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini',
         llmTokensIn: 0,
         llmTokensOut: 0,
         rules: [],
@@ -700,7 +700,7 @@ async function callOpenAI(
   const apiKey = (subKey?.trim() || globalKey || '').trim();
   if (!apiKey) throw new Error('no_key');
 
-  const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano';
+  const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini';
 
   const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [
     { role: 'system', content: VIBE_MOD_SYSTEM_PROMPT },
@@ -720,13 +720,20 @@ async function callOpenAI(
     method: 'POST',
     headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
     body: JSON.stringify({
-      model,
+      model, // gpt-5.4-mini (default) / gpt-5.4-nano / gpt-5.4 — see devvit.json openaiModel
       response_format: { type: 'json_object' },
       messages,
-      // Newer OpenAI models (gpt-5.x family) require max_completion_tokens (not max_tokens)
-      // and only accept the default temperature, so we don't send `temperature`. Determinism
-      // is carried by response_format: json_object + the strict prompt + few-shot examples.
-      max_completion_tokens: 700,
+      // Tuned for what this call is: a mechanical NL → strict-JSON translation.
+      //   reasoning_effort: 'none'  — no hidden reasoning needed; keeps it fast and stops the
+      //                               token budget being eaten by reasoning (gpt-5.4 family value;
+      //                               older models call this 'minimal'). Measured ~1.1–1.4s.
+      //   verbosity: 'low'          — terse JSON, no commentary.
+      //   max_completion_tokens     — a compiled rule + a clarification fit well under 600.
+      //   (no `temperature` — the gpt-5.x family only accepts the default; max_tokens isn't
+      //    supported on these models, use max_completion_tokens.)
+      reasoning_effort: 'none',
+      verbosity: 'low',
+      max_completion_tokens: 600,
     }),
   });