Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 58 additions & 28 deletions scripts/chrome-reddit-verify-phase17b.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,19 @@
AUTH_DIR = ROOT / "playwright" / ".auth"
STATE = AUTH_DIR / "reddit-com.json"
HEADLESS = os.environ.get("HEADLESS", "1") == "1"
FULLSCREEN = os.environ.get("FULLSCREEN", "0") == "1"
def _safe_int(env_name: str, default: int) -> int:
"""Parse an int env var; on garbage input fall back to the default
rather than crashing the recording session (CodeRabbit #2 PR #49)."""
raw = os.environ.get(env_name, str(default))
try:
return int(raw)
except (ValueError, TypeError):
print(f"[verify] WARN: {env_name}={raw!r} is not an int — using default {default}")
return default


SLOW_MO = _safe_int("SLOW_MO", 0) # ms between actions, 0 = no slow-down
SUB = os.environ.get("REDDIT_SUB", "SocialSeeding")
# Ambiguous input designed to trigger the clarify path on the first compile.
COMPOSE_INPUT = os.environ.get(
Expand Down Expand Up @@ -236,19 +249,34 @@ async def main() -> None:
report = RunReport(sub=SUB, input=COMPOSE_INPUT)

async with async_playwright() as p:
launch_args = ["--disable-blink-features=AutomationControlled"]
if FULLSCREEN:
# macOS-friendly recording mode: `--start-maximized` opens the
# Chrome window at the user's full screen size (with the address
# bar visible — useful for "this is reddit.com" demo context).
# Pair with `no_viewport=True` so the page matches the window
# exactly, and let Retina DPR happen naturally for sharp text.
# `--start-fullscreen` is unreliable on macOS through Playwright
# (frequently ignored) and `--kiosk` hides the URL bar.
launch_args += ["--start-maximized"]
browser = await p.chromium.launch(
headless=HEADLESS,
args=["--disable-blink-features=AutomationControlled"],
slow_mo=SLOW_MO,
args=launch_args,
)
ctx = await browser.new_context(
ctx_kwargs = dict(
storage_state=str(STATE),
viewport={"width": 1600, "height": 1000},
user_agent=(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
),
locale="en-US",
)
if FULLSCREEN:
ctx_kwargs["no_viewport"] = True # let the page match the maximized window
else:
ctx_kwargs["viewport"] = {"width": 1600, "height": 1000}
ctx = await browser.new_context(**ctx_kwargs)
page = await ctx.new_page()

async def shot(name: str) -> str:
Expand Down Expand Up @@ -311,16 +339,14 @@ async def shot(name: str) -> str:
extra={"form": clarify_form, "round_visible": round_visible},
))

if not clarify_pass:
# The model didn't ask for clarification on this input — the
# Confirm form should have opened directly. Fall through to
# confirm-form check.
print(f"[verify] no clarify modal — assuming confirm path.")
else:
# Pick a suggested option via direct DOM mutation (the Devvit
# `faceplate-select` Lit element doesn't respond to a vanilla
# locator.click, but assigning .value + dispatching change/input
# events drives the same code path the human click would).
# Multi-round clarify loop — if the LLM asks again after our first
# answer, pick another option and re-compile, up to MAX_CLARIFY_TURNS.
# Devvit caps at MAX_CLARIFY_TURNS=3 server-side so we'll bottom out
# one way or another.
clarify_rounds_handled = 0
while clarify_pass and clarify_rounds_handled < 3:
clarify_rounds_handled += 1
print(f"[verify] handling clarify round #{clarify_rounds_handled}")
picker_result = {}
try:
picker_result = await page.evaluate(
Expand Down Expand Up @@ -375,29 +401,33 @@ async def shot(name: str) -> str:
except Exception as e:
picker_result = {"ok": False, "exception": str(e)}

# Only the first picker iteration counts toward the official
# `clarify-select-pick` step — later rounds get suffixed names
# so the report.overall() check stays meaningful.
step_name = "clarify-select-pick" if clarify_rounds_handled == 1 else f"clarify-select-pick-r{clarify_rounds_handled}"
report.add(StepResult(
"clarify-select-pick",
step_name,
bool(picker_result.get('ok')),
f"picked={picker_result.get('picked')} controls_seen={len(picker_result.get('found', []))} opts_seen={len(picker_result.get('opts', []))}",
extra={"picker": picker_result},
))

# Mark PASS when we successfully drove some select-like control.
# The form's defaultValue already pre-selects opts[0], so even
# if our picker only reaffirmed it the next compile will see
# this value — that's still a real round-trip.
if 'clarify-select-pick' not in {s.name for s in report.steps}:
report.add(StepResult(
"clarify-select-pick",
bool(picker_ok),
f"picker_ok={picker_ok} picked_value={picked_val!r}",
))

await shot("05-clarify-picked")
await shot(f"05-clarify-r{clarify_rounds_handled}-picked")
recompiled = await submit_form(page, r"recompile|re-compile|compile")
report.add(StepResult("clarify-recompile-submit", recompiled, "clicked Re-compile"))
recompile_step = "clarify-recompile-submit" if clarify_rounds_handled == 1 else f"clarify-recompile-submit-r{clarify_rounds_handled}"
report.add(StepResult(recompile_step, recompiled, "clicked Re-compile"))
await page.wait_for_timeout(8_000)
await shot("06-after-recompile")
await shot(f"06-after-recompile-r{clarify_rounds_handled}")

# Re-dump the form so the loop condition reflects what came
# back. If the LLM is satisfied, the next form is the Confirm
# form and clarify_pass flips to False, breaking the loop.
clarify_form = await dump_form(page)
field_names = {f.get('name', '') for f in clarify_form.get('fields', [])}
still_clarify = 'clarificationTurn' in field_names
still_select = any('select' in (f.get('type') or '') for f in clarify_form.get('fields', []))
clarify_pass = still_clarify and still_select
print(f"[verify] round #{clarify_rounds_handled} done. still_clarify={still_clarify}")

# ── 4 · expect Confirm form ─────────────────────────────────────
confirm_form = await dump_form(page)
Expand Down
55 changes: 40 additions & 15 deletions src/server/routes-compose.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -386,18 +386,19 @@ describe('POST /internal/form/compose-rule-submit', () => {
expect(ruleField.disabled).toBeFalsy();
});

// ── Phase 1.6 (audit finding #6): success toast carries rule summary + menu hint ──
// After Phase 1.7b, the toast comes from /compose-confirm-submit (not the
// compose-rule-submit handler), but the wording lives in
// persistRuleAndStartDryRun() so the assertions still apply to the saved toast.
it('success toast includes a 1-line rule summary and the View-rules menu hint', async () => {
// ── Phase 2c demo-recording UX clean-up: success toast is short ──
// The previous wording packed 4 clauses into one line and Devvit's toast
// truncated mid-sentence in the recording. The compose-confirm form
// already shows the full rule + cost so the toast just needs to confirm
// what happened.
it('success toast is short and names the rule that was saved', async () => {
asMod();
fakeSettings.get.mockImplementation(async (k: string) => (k === 'openaiApiKey' ? 'sk-dev' : undefined));
fakeFetch.mockResolvedValue(openaiResponse(VALID_COMPILED));
const { saveBody } = await compileAndConfirm(VALID_COMPILED.sourceNL, false);
expect(saveBody.showToast.appearance).toBe('success');
expect(saveBody.showToast.text).toMatch(/→\s*post[\w+]*:\s*modqueue/);
expect(saveBody.showToast.text).toContain('vibe-mod: View rules + log');
expect(saveBody.showToast.text).toContain('Flag low-karma posts');
expect(saveBody.showToast.text.length).toBeLessThan(120); // Devvit toast budget
});

it('compiles a valid rule → shows confirm form → on Save: stores draft, bumps counter, schedules dry-run', async () => {
Expand All @@ -407,24 +408,48 @@ describe('POST /internal/form/compose-rule-submit', () => {
);
fakeFetch.mockResolvedValue(openaiResponse(VALID_COMPILED));

const { confirmFormBody, saveBody } = await compileAndConfirm(VALID_COMPILED.sourceNL, false);
// Phase 1.7b (audit finding #2): compile-rule-submit returns a confirm form,
// not a success toast. Persistence happens on /compose-confirm-submit.
// Phase 1.7b + Phase 2c (audit finding #2): compile-rule-submit returns
// a confirm form, not a success toast. The form now carries a single
// short pendingId — the actual compile state lives under a Redis key
// (audit finding #B from the demo recording — internal carriers were
// bloating the modal). We run the two steps manually here so we can
// assert on the pending entry between them.
const composeRes = await call('/internal/form/compose-rule-submit', {
rule: VALID_COMPILED.sourceNL,
allowGuarded: false,
});
const confirmFormBody = await composeRes.json();
expect(confirmFormBody.showForm.name).toBe('composeConfirmForm');
expect(confirmFormBody.showForm.form.title).toContain('Flag low-karma posts');
// The form embeds the compiled rule + token counts as state carriers.
const fieldsByName = Object.fromEntries(
(confirmFormBody.showForm.form.fields as Array<{ name: string; defaultValue: unknown }>).map((f) => [
f.name,
f.defaultValue,
]),
);
expect(fieldsByName.serializedRule).toContain('r_low_karma_flag');
expect(fieldsByName.llmModel).toBe('gpt-5.4-nano');

// After Save: draft persisted, counter bumped, dry-run scheduled.
expect(fieldsByName.compiledSummary).toContain('Flag low-karma posts');
expect(typeof fieldsByName.pendingId).toBe('string');
expect((fieldsByName.pendingId as string).length).toBeGreaterThan(0);
// No more raw internal carriers in the modal.
expect(fieldsByName.serializedRule).toBeUndefined();
expect(fieldsByName.llmModel).toBeUndefined();
expect(fieldsByName.usingBYOK).toBeUndefined();
// The pending entry should round-trip the model the test stubbed.
const pendingJson = JSON.parse((await fakeRedis.get(`testsub:compose:pending:${fieldsByName.pendingId}`))!);
expect(pendingJson.llmModel).toBe('gpt-5.4-nano');
expect(pendingJson.validated.id).toBe('r_low_karma_flag');

// Now run Save — pending entry gets consumed, draft persisted.
const savePayload: Record<string, unknown> = {};
for (const f of confirmFormBody.showForm.form.fields as Array<{ name: string; defaultValue: unknown }>) {
savePayload[f.name] = f.defaultValue;
}
savePayload.editInsteadOfSave = false;
const saveRes = await call('/internal/form/compose-confirm-submit', savePayload);
const saveBody = await saveRes.json();
expect(saveBody.showToast.appearance).toBe('success');
expect(saveBody.showToast.text).toContain('Flag low-karma posts');
expect(await fakeRedis.get(`testsub:compose:pending:${fieldsByName.pendingId}`)).toBeUndefined();

const draft = JSON.parse((await fakeRedis.get('testsub:rules:draft'))!);
expect(draft.rules).toHaveLength(1);
Expand Down
56 changes: 37 additions & 19 deletions src/server/routes-dashboard.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ async function seedAudit(actionId: string, fields: Record<string, string>, score
await fakeRedis.hSet(`testsub:audit:${actionId}`, fields);
}

// Helper: concatenate every visible label + paragraph value the dashboard
// form renders, so the assertions still work against the Phase 2c
// multi-block layout (was: one big string in `description`).
function dashTexts(body: any): string {
return [
body.showForm.form.description ?? '',
...body.showForm.form.fields.map(
(f: { label?: string; defaultValue?: unknown }) => `${f.label ?? ''}\n${String(f.defaultValue ?? '')}`,
),
].join('\n');
}

describe('POST /internal/menu/dashboard', () => {
it('rejects a non-moderator', async () => {
const body = await (
Expand All @@ -43,8 +55,9 @@ describe('POST /internal/menu/dashboard', () => {
await call('/internal/menu/dashboard', { location: 'subreddit', targetId: 't5_testsub' })
).json();
expect(body.showForm.name).toBe('dashboardForm');
expect(body.showForm.form.description).toContain('Active rules: 0');
expect(body.showForm.form.description).toContain('Draft rules: 0');
const text = dashTexts(body);
expect(text).toContain('Active rules: 0');
expect(text).toContain('Draft rules: 0');
expect(body.showForm.form.acceptLabel).toBe('Close');
});

Expand Down Expand Up @@ -78,12 +91,13 @@ describe('POST /internal/menu/dashboard', () => {
const body = await (
await call('/internal/menu/dashboard', { location: 'subreddit', targetId: 't5_testsub' })
).json();
expect(body.showForm.form.description).toContain('Active rules: 2');
expect(body.showForm.form.description).toContain('Draft rules: 5');
expect(body.showForm.form.description).toContain('Recent actions: 2');
expect(body.showForm.form.description).toContain('modqueue (applied)');
expect(body.showForm.form.description).toContain('remove (shadow)');
expect(body.showForm.form.description).toMatch(/Tokens used \(lifetime\): 1,500 in \/ 300 out/);
const text = dashTexts(body);
expect(text).toContain('Active rules: 2');
expect(text).toContain('Draft rules: 5');
expect(text).toContain('Recent actions: 2');
expect(text).toContain('modqueue (applied)');
expect(text).toContain('remove (shadow)');
expect(text).toMatch(/1,500 in \/ 300 out/);
// Dashboard no longer triggers activation — that moved to Manage rules.
expect(body.showForm.form.acceptLabel).toBe('Close');
});
Expand Down Expand Up @@ -118,11 +132,10 @@ describe('POST /internal/menu/dashboard', () => {
const body = await (
await call('/internal/menu/dashboard', { location: 'subreddit', targetId: 't5_testsub' })
).json();
expect(body.showForm.form.description).toContain('Dry-run preview (draft rules):');
expect(body.showForm.form.description).toContain(
'r_new_account_fast_post: would match 1/10 recent post(s) → modqueue',
);
expect(body.showForm.form.description).toContain('r_wall_of_caps_comment: comment events; shadow mode it');
const text = dashTexts(body);
expect(text).toContain('Dry-run preview (draft rules)');
expect(text).toContain('r_new_account_fast_post: would match 1/10 recent post(s) → modqueue');
expect(text).toContain('r_wall_of_caps_comment: comment events; shadow mode it');
});
});

Expand Down Expand Up @@ -155,8 +168,9 @@ describe('Dashboard onboarding + empty state (Phase 1.7b Tier-3 #C, Tier-2 #A)',
const body = await (
await call('/internal/menu/dashboard', { location: 'subreddit', targetId: 't5_testsub' })
).json();
expect(body.showForm.form.description).toContain('Welcome to vibe-mod');
expect(body.showForm.form.description).toContain('3 quick steps');
const text = dashTexts(body);
expect(text).toContain('Welcome to vibe-mod');
expect(text).toContain('3 quick steps');
const fieldNames = body.showForm.form.fields.map((f: { name: string }) => f.name);
expect(fieldNames).toContain('dismissOnboarding');
});
Expand All @@ -167,8 +181,11 @@ describe('Dashboard onboarding + empty state (Phase 1.7b Tier-3 #C, Tier-2 #A)',
const body = await (
await call('/internal/menu/dashboard', { location: 'subreddit', targetId: 't5_testsub' })
).json();
expect(body.showForm.form.description).not.toContain('Welcome to vibe-mod');
expect(body.showForm.form.fields).toEqual([]);
const text = dashTexts(body);
expect(text).not.toContain('Welcome to vibe-mod');
// No `dismissOnboarding` toggle once the user has already dismissed it.
const fieldNames = body.showForm.form.fields.map((f: { name: string }) => f.name);
expect(fieldNames).not.toContain('dismissOnboarding');
});

it('emits a clear empty state when there are zero rules and zero recent actions (Tier-2 #A)', async () => {
Expand All @@ -177,7 +194,8 @@ describe('Dashboard onboarding + empty state (Phase 1.7b Tier-3 #C, Tier-2 #A)',
const body = await (
await call('/internal/menu/dashboard', { location: 'subreddit', targetId: 't5_testsub' })
).json();
expect(body.showForm.form.description).toContain('No rules yet');
expect(body.showForm.form.description).toContain('vibe-mod: Compose rule');
const text = dashTexts(body);
expect(text).toContain('No rules yet');
expect(text).toContain('vibe-mod: Compose rule');
});
});
Loading