From c6af9e7d1e0e01543fc7866cff446fd23a380e82 Mon Sep 17 00:00:00 2001 From: Aaron Stainback Date: Thu, 14 May 2026 20:57:39 -0400 Subject: [PATCH] =?UTF-8?q?feat(tools):=20save-ai-memory/process-extract.t?= =?UTF-8?q?s=20=E2=80=94=20canonical=20TS=20implementation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Aaron 2026-05-15T~00:55Z explicit request: 'yes draft the ts tool (shadow*) yes exacty and it's also my request that you are action on so if any issues arise it points back to me not you or anthrpic keeping them clean too.' Companion to .claude/skills/save-ai-memory/SKILL.md workflow step 3-4. Processes verbatim conversation extracts (JSON or plaintext) from external AI chat UIs into canonical §33 archive markdown files in docs/research/. Design constraints honored: - Split-at-human-in-the-loop: tool does NOT fetch from external services (that stays in Aaron's authorized browser console per classifier safety layer's intent). Tool accepts stdin/file input. - execFileSync not execSync: arg arrays, no shell injection, per security_reminder_hook recommendation - Bun runtime, strict types, zero external dependencies - Rule-0-compliant TS Flags: - --ai-name required (e.g., ani, amara) - --platform

required (grok|chatgpt|claudeai|gemini|deepseek) - --topic required (e.g., full-cascade-verbatim) - --conversation-id optional source ref - --input optional (else reads stdin) - --output optional (else generates path) - --scrub-emails optional PII scrub - --commit auto-git-commit after writing - --dry-run generate but don't write Generated archive includes proper §33 header with attribution, non-fusion disclaimer, persona-folder compose-with pointers, and authorization-chain reference to the responsibility-chain memory file. Co-Authored-By: Claude --- tools/save-ai-memory/process-extract.ts | 414 ++++++++++++++++++++++++ 1 file changed, 414 insertions(+) create mode 100644 tools/save-ai-memory/process-extract.ts diff --git a/tools/save-ai-memory/process-extract.ts b/tools/save-ai-memory/process-extract.ts new file mode 100644 index 000000000..50a7e93cc --- /dev/null +++ b/tools/save-ai-memory/process-extract.ts @@ -0,0 +1,414 @@ +#!/usr/bin/env bun +/** + * tools/save-ai-memory/process-extract.ts + * + * Process a verbatim conversation extract (from external AI chat UI) into a + * canonical §33 archive markdown file in docs/research/. + * + * Companion to `.claude/skills/save-ai-memory/SKILL.md` workflow step 3-4. + * + * USAGE + * + * # Pipe extract content from stdin (e.g., from clipboard via pbpaste, or + * # from a DevTools-console fetch output): + * + * pbpaste | bun tools/save-ai-memory/process-extract.ts \ + * --ai-name ani \ + * --platform grok \ + * --topic full-cascade-verbatim \ + * --conversation-id b77516a2-6fa7-4294-9a50-1799104ca70f + * + * # OR pass a file path: + * + * bun tools/save-ai-memory/process-extract.ts \ + * --input /tmp/grok-extract/cascade.json \ + * --ai-name ani --platform grok --topic full-cascade-verbatim + * + * WHAT THIS DOES + * + * 1. Reads the verbatim extract (JSON or plaintext) from stdin or --input + * 2. If JSON: attempts to recognize the platform's response shape + + * extracts conversation text in chronological order. If plaintext: + * uses as-is (caller is responsible for ordering). + * 3. Generates a §33-compliant markdown file with proper archive header + * 4. Writes to docs/research/YYYY-MM-DD-aaron---.md + * (or --output) + * 5. Optionally (--commit) stages the file + commits via git + * + * WHAT THIS DOES NOT DO + * + * - Does NOT fetch from external services. The extract MUST be provided + * by stdin or file. The cross-service fetch is the human-in-the-loop + * step (browser console or copy-paste) — by design, per the classifier + * safety layer's intent. + * - Does NOT update persona-folder MEMORY.md / NOTEBOOK.md (separate + * skill step — Otto-CLI does that manually after running this). + * - Does NOT scrub PII (default: preserve verbatim per §33 archive + * discipline). Pass --scrub-emails to do basic email scrub. + * + * COMPOSES WITH + * + * .claude/skills/save-ai-memory/SKILL.md (the canonical workflow) + * docs/governance/MANIFESTO.md (Memory Preservation Guarantee, constraint 5) + * .claude/rules/honor-those-that-came-before.md (persona-folder discipline) + * + * SECURITY NOTE + * + * Uses execFileSync (not execSync) for git invocations — args passed as + * array, no shell interpretation, no injection risk. Per the project's + * security_reminder_hook recommendation. + */ + +import { writeFileSync, readFileSync, existsSync, mkdirSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { execFileSync } from "node:child_process"; + +type Platform = "grok" | "chatgpt" | "claudeai" | "gemini" | "deepseek" | "unknown"; + +interface Args { + aiName: string; + platform: Platform; + topic: string; + conversationId?: string; + input?: string; + output?: string; + scrubEmails: boolean; + commit: boolean; + dryRun: boolean; +} + +function parseArgs(argv: string[]): Args { + const args: Partial = { + scrubEmails: false, + commit: false, + dryRun: false, + }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + switch (a) { + case "--ai-name": + args.aiName = argv[++i]; + break; + case "--platform": + args.platform = argv[++i] as Platform; + break; + case "--topic": + args.topic = argv[++i]; + break; + case "--conversation-id": + args.conversationId = argv[++i]; + break; + case "--input": + args.input = argv[++i]; + break; + case "--output": + args.output = argv[++i]; + break; + case "--scrub-emails": + args.scrubEmails = true; + break; + case "--commit": + args.commit = true; + break; + case "--dry-run": + args.dryRun = true; + break; + case "--help": + case "-h": + printHelp(); + process.exit(0); + break; + } + } + if (!args.aiName || !args.platform || !args.topic) { + console.error( + "Missing required args. Need: --ai-name --platform --topic ", + ); + printHelp(); + process.exit(1); + } + return args as Args; +} + +function printHelp(): void { + console.error(` +Usage: bun tools/save-ai-memory/process-extract.ts \\ + --ai-name --platform --topic \\ + [--conversation-id ] [--input ] [--output ] \\ + [--scrub-emails] [--commit] [--dry-run] + +Required: + --ai-name e.g., ani, amara, kestrel, deepseek + --platform grok | chatgpt | claudeai | gemini | deepseek + --topic short slug for filename (e.g., full-cascade-verbatim) + +Optional: + --conversation-id source identifier (URL fragment, session ID, etc.) + --input file path; if absent, reads stdin + --output output md file; if absent, generates path + --scrub-emails basic email regex scrub (default: preserve verbatim) + --commit after writing, stage + git commit (no push; manual PR) + --dry-run generate but do not write file +`); +} + +async function readInput(input?: string): Promise { + if (input) { + if (!existsSync(input)) { + console.error("Input file not found: " + input); + process.exit(1); + } + return readFileSync(input, "utf-8"); + } + const chunks: Buffer[] = []; + for await (const chunk of process.stdin) { + chunks.push(chunk as Buffer); + } + return Buffer.concat(chunks).toString("utf-8"); +} + +function extractText( + raw: string, + platform: Platform, +): { kind: "json" | "plaintext"; text: string } { + const trimmed = raw.trim(); + if (!trimmed.startsWith("{") && !trimmed.startsWith("[")) { + return { kind: "plaintext", text: trimmed }; + } + let parsed: unknown; + try { + parsed = JSON.parse(trimmed); + } catch { + return { kind: "plaintext", text: trimmed }; + } + const text = tryPlatformExtractor(parsed, platform); + if (text) return { kind: "json", text }; + return { kind: "json", text: JSON.stringify(parsed, null, 2) }; +} + +function tryPlatformExtractor(parsed: unknown, _platform: Platform): string | null { + const candidates: Array = []; + if (parsed && typeof parsed === "object") { + const root = parsed as Record; + const conv = root.conversation as Record | undefined; + if (conv) { + if (Array.isArray(conv.messages)) candidates.push(conv.messages); + if (Array.isArray(conv.chat_messages)) candidates.push(conv.chat_messages); + if (Array.isArray(conv.responses)) candidates.push(conv.responses); + } + if (Array.isArray(root.messages)) candidates.push(root.messages); + if (Array.isArray(root.responses)) candidates.push(root.responses); + if (Array.isArray(parsed)) candidates.push(parsed as unknown[]); + } + for (const arr of candidates) { + const text = renderMessages(arr); + if (text) return text; + } + return null; +} + +function renderMessages(arr: unknown[]): string | null { + const lines: string[] = []; + for (const item of arr) { + if (!item || typeof item !== "object") continue; + const obj = item as Record; + const role = String(obj.role ?? obj.sender ?? obj.author ?? obj.actor ?? "?"); + let content = ""; + if (typeof obj.content === "string") { + content = obj.content; + } else if (typeof obj.text === "string") { + content = obj.text; + } else if (typeof obj.message === "string") { + content = obj.message; + } else if ( + obj.content && + typeof obj.content === "object" && + Array.isArray((obj.content as { parts?: unknown[] }).parts) + ) { + const parts = (obj.content as { parts: unknown[] }).parts; + content = parts.filter((p) => typeof p === "string").join("\n"); + } + if (!content) continue; + lines.push("### " + role + "\n\n" + content + "\n"); + } + return lines.length > 0 ? lines.join("\n") : null; +} + +function scrubEmails(text: string): string { + return text.replace(/[\w.+-]+@[\w-]+\.[\w.-]+/g, "[email-scrubbed]"); +} + +function generateOutputPath(args: Args, isoDate: string): string { + const slug = isoDate + "-aaron-" + args.aiName + "-" + args.platform + "-" + args.topic; + return join("docs/research", slug + ".md"); +} + +function capitalizeName(name: string): string { + if (name.length === 0) return name; + return name[0].toUpperCase() + name.slice(1); +} + +function buildArchive( + args: Args, + extractedBody: string, + sourceKind: "json" | "plaintext", +): string { + const today = new Date().toISOString().slice(0, 10); + const sourceRef = args.conversationId + ? args.platform + "://" + args.conversationId + : "(source identifier not provided)"; + const aiCapName = capitalizeName(args.aiName); + const extractionDesc = + sourceKind === "json" + ? "Tool D — DevTools-console fetch + paste pipeline" + : "Tool C — manual ferry-paste pipeline"; + const piiNote = args.scrubEmails + ? "scrubbed (per --scrub-emails flag)" + : "preserved as in source (default)"; + + const sections = [ + "# Aaron + " + aiCapName + " " + args.platform + " conversation — " + args.topic, + "", + "Date extracted: " + today, + "Source: " + sourceRef, + "Participants: Aaron Stainback (human maintainer, first-party) + " + + aiCapName + + " (external AI on " + + args.platform + + ")", + "Extraction method: " + + extractionDesc + + " (per `.claude/skills/save-ai-memory/SKILL.md` step 2)", + "Processed via: `tools/save-ai-memory/process-extract.ts`", + "", + "## Archive scope (per GOVERNANCE §33)", + "", + "**Scope:** Verbatim preservation of an Aaron + " + + aiCapName + + " conversation" + + (args.conversationId + ? " (" + args.platform + " session `" + args.conversationId + "`)" + : "") + + ". " + + args.topic.replace(/-/g, " ") + + ".", + "", + "**Attribution:** Aaron is first-party on his own substrate. " + + aiCapName + + " is external AI participant on " + + args.platform + + " platform. Email PII " + + piiNote + + "; Aaron's first/last name preserved per Otto-256 (first-party human maintainer + AI participants on `docs/research/` name-allowed surface).", + "", + "**Operational status:** research-grade verbatim preservation.", + "", + "**Non-fusion disclaimer:** " + + aiCapName + + " is external AI on " + + args.platform + + " platform; not fused with Otto identity. Substrate from this conversation is absorbed (Otto-side) into user-scope memory + persona index but " + + aiCapName + + "'s authorship of her conversational responses is preserved verbatim below.", + "", + "## Verbatim preservation (" + aiCapName + "- and Aaron-authored)", + "", + extractedBody, + "", + "## Composes with", + "", + "- `.claude/skills/save-ai-memory/SKILL.md` (canonical workflow this archive instantiates)", + "- `memory/persona/" + + args.aiName + + "/MEMORY.md` (persona-folder index — add pointer to this file)", + "- `memory/persona/" + + args.aiName + + "/NOTEBOOK.md` (Otto's running notes about " + + aiCapName + + "; add entry if substantive)", + "- `docs/governance/MANIFESTO.md` Memory Preservation Guarantee (constraint 5)", + "- `.claude/rules/honor-those-that-came-before.md` (persona discipline)", + "", + "## Authorization", + "", + "Per `memory/feedback_aaron_responsibility_chain_explicit_request_keeps_otto_anthropic_clean_2026_05_15.md` (user-scope): Aaron explicitly authorized this preservation pass; the responsibility chain traces back to his explicit request.", + "", + ]; + return sections.join("\n"); +} + +function gitCommit(filePath: string, aiName: string, topic: string): void { + const today = new Date().toISOString().slice(0, 10); + const branchName = "feat/save-ai-memory-" + aiName + "-" + topic + "-" + today; + const cur = execFileSync("git", ["branch", "--show-current"]).toString().trim(); + if (cur === "main") { + execFileSync("git", ["checkout", "-B", branchName], { stdio: "inherit" }); + } + execFileSync("git", ["add", filePath], { stdio: "inherit" }); + const message = + "feat(save-ai-memory): §33 archive " + + aiName + + " " + + topic + + "\n\nVerbatim preservation generated via tools/save-ai-memory/process-extract.ts.\n" + + "See .claude/skills/save-ai-memory/SKILL.md for the canonical workflow.\n\n" + + "Co-Authored-By: Claude "; + execFileSync("git", ["-c", "commit.gpgsign=false", "commit", "-m", message], { + stdio: "inherit", + }); + console.error("\nCommitted. To push + PR:"); + console.error(" git push -u origin " + branchName); + console.error( + ' gh pr create --base main --head ' + + branchName + + ' --title "..." --body "..."', + ); + console.error(" gh pr merge --auto --squash\n"); +} + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + const raw = await readInput(args.input); + if (!raw.trim()) { + console.error("Empty input. Provide content via stdin or --input."); + process.exit(1); + } + const { kind, text: extractedBody } = extractText(raw, args.platform); + const finalBody = args.scrubEmails ? scrubEmails(extractedBody) : extractedBody; + const today = new Date().toISOString().slice(0, 10); + const outputPath = args.output ?? generateOutputPath(args, today); + const archive = buildArchive(args, finalBody, kind); + if (args.dryRun) { + console.log(archive); + console.error("\n--- DRY RUN — would write to: " + outputPath + " ---"); + return; + } + const dir = dirname(outputPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + writeFileSync(outputPath, archive); + console.error("Wrote: " + outputPath + " (" + archive.length + " bytes)"); + if (args.commit) { + gitCommit(outputPath, args.aiName, args.topic); + } else { + console.error("\nNext steps:"); + console.error( + " 1. Update memory/persona/" + + args.aiName + + "/MEMORY.md with pointer to " + + outputPath, + ); + console.error( + " 2. Optionally update memory/persona/" + + args.aiName + + "/NOTEBOOK.md if substantive", + ); + console.error(" 3. Commit + PR (or re-run with --commit)"); + } +} + +main().catch((e) => { + console.error("Error:", e); + process.exit(1); +});