diff --git a/devvit.json b/devvit.json
index 14d24a8..ecb5383 100644
--- a/devvit.json
+++ b/devvit.json
@@ -18,18 +18,22 @@
       },
       "openaiModel": {
         "type": "select",
-        "label": "OpenAI model (developer-set default)",
+        "label": "OpenAI model for rule compilation (reasoning_effort: none, verbosity: low)",
         "options": [
           {
-            "label": "gpt-5.4-nano (recommended — fast ~1.4s, cheapest, 7/7 on the rule-compile smoke test)",
+            "label": "gpt-5.4-mini (recommended — fastest ~1.1s, 7/7 on the rule-compile smoke test)",
+            "value": "gpt-5.4-mini"
+          },
+          {
+            "label": "gpt-5.4-nano (cheapest, ~1.4s, also 7/7)",
             "value": "gpt-5.4-nano"
           },
           {
-            "label": "gpt-5.4-mini (fast ~1.2s, equal quality, a bit pricier — pick this if cost is free for you)",
-            "value": "gpt-5.4-mini"
+            "label": "gpt-5.4 (full — slower ~1.8s, more cautious about ambiguous rules; no quality gain for this task)",
+            "value": "gpt-5.4"
           }
         ],
-        "defaultValue": "gpt-5.4-nano"
+        "defaultValue": "gpt-5.4-mini"
       }
     },
     "subreddit": {
diff --git a/scripts/openai-smoketest.ts b/scripts/openai-smoketest.ts
index cb35687..594f4eb 100644
--- a/scripts/openai-smoketest.ts
+++ b/scripts/openai-smoketest.ts
@@ -17,8 +17,10 @@
 //
 // NOT part of `npm run check` / CI (needs a real key).
 //
-// IMPORTANT: keep the request payload below in sync with `callOpenAI` in
-// src/server/index.ts (same response_format, max_completion_tokens, no temperature).
+// IMPORTANT: the default request config below mirrors `callOpenAI` in
+// src/server/index.ts (response_format: json_object, reasoning_effort: 'none',
+// verbosity: 'low', max_completion_tokens: 600, no temperature). Env vars
+// REASONING_EFFORT / VERBOSITY / MAX_COMPLETION_TOKENS override for experiments.
 
 import { readFileSync, existsSync } from 'node:fs';
 import { join } from 'node:path';
@@ -43,6 +45,14 @@ const MODELS = (process.env.OPENAI_MODELS?.trim() || process.env.OPENAI_MODEL?.t
   .split(',')
   .map((s) => s.trim())
   .filter(Boolean);
+// Request tuning — DEFAULTS MATCH callOpenAI() in src/server/index.ts. Override
+// via env to experiment. `none` reasoning (gpt-5.4 family value; older models
+// use `minimal`) suits this mechanical NL→JSON task: fast, no token-budget burn.
+// Set REASONING_EFFORT='' to omit the param entirely.
+const REASONING_EFFORT =
+  process.env.REASONING_EFFORT === '' ? undefined : process.env.REASONING_EFFORT?.trim() || 'none'; // none | low | medium | high | xhigh
+const VERBOSITY = process.env.VERBOSITY === '' ? undefined : process.env.VERBOSITY?.trim() || 'low'; // low | medium | high
+const MAX_COMPLETION_TOKENS = Number(process.env.MAX_COMPLETION_TOKENS) || 600;
 
 if (!API_KEY) {
   console.error(
@@ -63,7 +73,7 @@ const CASES: Case[] = [
     rule: 'If a post title is at least 12 characters and more than 70% capital letters, add the flair "Edit your title?"',
     expect: 'rule',
   },
-  { rule: 'Report comments that are over 60 characters and almost entirely uppercase', expect: 'rule' },
+  { rule: 'Report comments over 60 characters where more than 90% of the letters are uppercase', expect: 'rule' },
   {
     rule: 'Send to the mod queue any post linking to a known URL shortener (bit.ly, tinyurl.com, t.co)',
     expect: 'rule',
@@ -90,11 +100,20 @@ async function compile(model: string, userRule: string): Promise<ApiResult> {
   }
   messages.push({ role: 'user', content: userRule });
 
+  const body: Record<string, unknown> = {
+    model,
+    response_format: { type: 'json_object' },
+    messages,
+    max_completion_tokens: MAX_COMPLETION_TOKENS,
+  };
+  if (REASONING_EFFORT) body.reasoning_effort = REASONING_EFFORT;
+  if (VERBOSITY) body.verbosity = VERBOSITY;
+
   const t0 = performance.now();
   const resp = await fetch('https://api.openai.com/v1/chat/completions', {
     method: 'POST',
     headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${API_KEY}` },
-    body: JSON.stringify({ model, response_format: { type: 'json_object' }, messages, max_completion_tokens: 700 }),
+    body: JSON.stringify(body),
   });
   if (!resp.ok) {
     let code: string | undefined;
diff --git a/src/server/index.ts b/src/server/index.ts
index 04912a1..243e748 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -294,7 +294,7 @@ app.post('/internal/form/compose-rule-submit', async (c) => {
         schemaVersion: '1.0.0',
         bundleVersion: 0,
         compiledAt: Date.now(),
-        llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano',
+        llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini',
         llmTokensIn: 0,
         llmTokensOut: 0,
         rules: [],
@@ -700,7 +700,7 @@ async function callOpenAI(
   const apiKey = (subKey?.trim() || globalKey || '').trim();
   if (!apiKey) throw new Error('no_key');
 
-  const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano';
+  const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini';
 
   const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [
     { role: 'system', content: VIBE_MOD_SYSTEM_PROMPT },
@@ -720,13 +720,20 @@ async function callOpenAI(
     method: 'POST',
     headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
     body: JSON.stringify({
-      model,
+      model, // gpt-5.4-mini (default) / gpt-5.4-nano / gpt-5.4 — see devvit.json openaiModel
       response_format: { type: 'json_object' },
       messages,
-      // Newer OpenAI models (gpt-5.x family) require max_completion_tokens (not max_tokens)
-      // and only accept the default temperature, so we don't send `temperature`. Determinism
-      // is carried by response_format: json_object + the strict prompt + few-shot examples.
-      max_completion_tokens: 700,
+      // Tuned for what this call is: a mechanical NL → strict-JSON translation.
+      //   reasoning_effort: 'none'  — no hidden reasoning needed; keeps it fast and stops the
+      //                               token budget being eaten by reasoning (gpt-5.4 family value;
+      //                               older models call this 'minimal'). Measured ~1.1–1.4s.
+      //   verbosity: 'low'          — terse JSON, no commentary.
+      //   max_completion_tokens     — a compiled rule + a clarification fit well under 600.
+      //   (no `temperature` — the gpt-5.x family only accepts the default; max_tokens isn't
+      //    supported on these models, use max_completion_tokens.)
+      reasoning_effort: 'none',
+      verbosity: 'low',
+      max_completion_tokens: 600,
     }),
   });