Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions devvit.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,22 @@
},
"openaiModel": {
"type": "select",
"label": "OpenAI model (developer-set default)",
"label": "OpenAI model for rule compilation (reasoning_effort: none, verbosity: low)",
"options": [
{
"label": "gpt-5.4-nano (recommended — fast ~1.4s, cheapest, 7/7 on the rule-compile smoke test)",
"label": "gpt-5.4-mini (recommended — fastest ~1.1s, 7/7 on the rule-compile smoke test)",
"value": "gpt-5.4-mini"
},
{
"label": "gpt-5.4-nano (cheapest, ~1.4s, also 7/7)",
"value": "gpt-5.4-nano"
},
{
"label": "gpt-5.4-mini (fast ~1.2s, equal quality, a bit pricier — pick this if cost is free for you)",
"value": "gpt-5.4-mini"
"label": "gpt-5.4 (full — slower ~1.8s, more cautious about ambiguous rules; no quality gain for this task)",
"value": "gpt-5.4"
}
],
"defaultValue": "gpt-5.4-nano"
"defaultValue": "gpt-5.4-mini"
}
},
"subreddit": {
Expand Down
27 changes: 23 additions & 4 deletions scripts/openai-smoketest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
//
// NOT part of `npm run check` / CI (needs a real key).
//
// IMPORTANT: keep the request payload below in sync with `callOpenAI` in
// src/server/index.ts (same response_format, max_completion_tokens, no temperature).
// IMPORTANT: the default request config below mirrors `callOpenAI` in
// src/server/index.ts (response_format: json_object, reasoning_effort: 'none',
// verbosity: 'low', max_completion_tokens: 600, no temperature). Env vars
// REASONING_EFFORT / VERBOSITY / MAX_COMPLETION_TOKENS override for experiments.

import { readFileSync, existsSync } from 'node:fs';
import { join } from 'node:path';
Expand All @@ -43,6 +45,14 @@ const MODELS = (process.env.OPENAI_MODELS?.trim() || process.env.OPENAI_MODEL?.t
.split(',')
.map((s) => s.trim())
.filter(Boolean);
// Request tuning — DEFAULTS MATCH callOpenAI() in src/server/index.ts. Override
// via env to experiment. `none` reasoning (gpt-5.4 family value; older models
// use `minimal`) suits this mechanical NL→JSON task: fast, no token-budget burn.
// Set REASONING_EFFORT='' to omit the param entirely.
const REASONING_EFFORT =
process.env.REASONING_EFFORT === '' ? undefined : process.env.REASONING_EFFORT?.trim() || 'none'; // none | low | medium | high | xhigh
const VERBOSITY = process.env.VERBOSITY === '' ? undefined : process.env.VERBOSITY?.trim() || 'low'; // low | medium | high
const MAX_COMPLETION_TOKENS = Number(process.env.MAX_COMPLETION_TOKENS) || 600;

if (!API_KEY) {
console.error(
Expand All @@ -63,7 +73,7 @@ const CASES: Case[] = [
rule: 'If a post title is at least 12 characters and more than 70% capital letters, add the flair "Edit your title?"',
expect: 'rule',
},
{ rule: 'Report comments that are over 60 characters and almost entirely uppercase', expect: 'rule' },
{ rule: 'Report comments over 60 characters where more than 90% of the letters are uppercase', expect: 'rule' },
{
rule: 'Send to the mod queue any post linking to a known URL shortener (bit.ly, tinyurl.com, t.co)',
expect: 'rule',
Expand All @@ -90,11 +100,20 @@ async function compile(model: string, userRule: string): Promise<ApiResult> {
}
messages.push({ role: 'user', content: userRule });

const body: Record<string, unknown> = {
model,
response_format: { type: 'json_object' },
messages,
max_completion_tokens: MAX_COMPLETION_TOKENS,
};
if (REASONING_EFFORT) body.reasoning_effort = REASONING_EFFORT;
if (VERBOSITY) body.verbosity = VERBOSITY;

const t0 = performance.now();
const resp = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${API_KEY}` },
body: JSON.stringify({ model, response_format: { type: 'json_object' }, messages, max_completion_tokens: 700 }),
body: JSON.stringify(body),
});
if (!resp.ok) {
let code: string | undefined;
Expand Down
21 changes: 14 additions & 7 deletions src/server/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ app.post('/internal/form/compose-rule-submit', async (c) => {
schemaVersion: '1.0.0',
bundleVersion: 0,
compiledAt: Date.now(),
llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano',
llmModel: ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini',
llmTokensIn: 0,
llmTokensOut: 0,
rules: [],
Expand Down Expand Up @@ -700,7 +700,7 @@ async function callOpenAI(
const apiKey = (subKey?.trim() || globalKey || '').trim();
if (!apiKey) throw new Error('no_key');

const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-nano';
const model = ((await settings.get('openaiModel')) as string) || 'gpt-5.4-mini';

const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [
{ role: 'system', content: VIBE_MOD_SYSTEM_PROMPT },
Expand All @@ -720,13 +720,20 @@ async function callOpenAI(
method: 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
body: JSON.stringify({
model,
model, // gpt-5.4-mini (default) / gpt-5.4-nano / gpt-5.4 — see devvit.json openaiModel
response_format: { type: 'json_object' },
messages,
// Newer OpenAI models (gpt-5.x family) require max_completion_tokens (not max_tokens)
// and only accept the default temperature, so we don't send `temperature`. Determinism
// is carried by response_format: json_object + the strict prompt + few-shot examples.
max_completion_tokens: 700,
// Tuned for what this call is: a mechanical NL → strict-JSON translation.
// reasoning_effort: 'none' — no hidden reasoning needed; keeps it fast and stops the
// token budget being eaten by reasoning (gpt-5.4 family value;
// older models call this 'minimal'). Measured ~1.1–1.4s.
// verbosity: 'low' — terse JSON, no commentary.
// max_completion_tokens — a compiled rule + a clarification fit well under 600.
// (no `temperature` — the gpt-5.x family only accepts the default; max_tokens isn't
// supported on these models, use max_completion_tokens.)
reasoning_effort: 'none',
verbosity: 'low',
max_completion_tokens: 600,
}),
Comment on lines 722 to 737
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Hardcoding reasoning_effort and verbosity in the request body can lead to API errors (HTTP 400) if the model stored in the user's settings does not support these parameters. This is a significant risk for existing installations where an older model (like gpt-4o-mini or gpt-5-mini) might still be configured in the subreddit settings.

Additionally, verbosity is not a standard parameter in the public OpenAI Chat Completions API. While reasoning_effort is a valid parameter for reasoning models (like the o1/o3 family), verbosity appears to be non-standard and may cause errors if the API does not recognize it.

It is safer to construct the request body conditionally, ensuring these parameters are only sent to models known to support them (the gpt-5.4 family in this context), similar to the defensive implementation in the smoketest script.

    body: JSON.stringify({
      model, // gpt-5.4-mini (default) / gpt-5.4-nano / gpt-5.4 — see devvit.json openaiModel
      response_format: { type: 'json_object' },
      messages,
      // Tuned for what this call is: a mechanical NL → strict-JSON translation.
      //   reasoning_effort: 'none'  — no hidden reasoning needed; keeps it fast and stops the
      //                               token budget being eaten by reasoning (gpt-5.4 family value;
      //                               older models call this 'minimal'). Measured ~1.1–1.4s.
      //   verbosity: 'low'          — terse JSON, no commentary.
      //   max_completion_tokens     — a compiled rule + a clarification fit well under 600.
      //   (no `temperature` — the gpt-5.x family only accepts the default; max_tokens isn't
      //    supported on these models, use max_completion_tokens.)
      max_completion_tokens: 600,
      ...(model.startsWith('gpt-5.4') ? { reasoning_effort: 'none', verbosity: 'low' } : {}),
    }),

});

Expand Down