diff --git a/services/ask-ai-bot/.dockerignore b/services/ask-ai-bot/.dockerignore index 3132d0b4274f..f8fd32de55a6 100644 --- a/services/ask-ai-bot/.dockerignore +++ b/services/ask-ai-bot/.dockerignore @@ -10,6 +10,10 @@ # Include documentation (only docs subfolder needed) !documentation/docs/ +# Include codebase source for code browsing +!ui/ +!crates/ + # Exclude unnecessary files from included directories services/ask-ai-bot/node_modules services/ask-ai-bot/dist @@ -19,3 +23,6 @@ services/ask-ai-bot/.discraft # Exclude large assets from docs that aren't needed for search documentation/docs/assets + +# Exclude build artifacts from crates +crates/**/target diff --git a/services/ask-ai-bot/.env.example b/services/ask-ai-bot/.env.example index d3277e8a3481..bd6490f05f09 100644 --- a/services/ask-ai-bot/.env.example +++ b/services/ask-ai-bot/.env.example @@ -13,3 +13,6 @@ AI_MODEL=claude-sonnet-4-6 # Path to documentation directory (default: ./docs in Docker, for local dev use ../../documentation/docs) DOCS_PATH=../../documentation/docs + +# Path to codebase root (default: ../.. relative to this service, /app/codebase in Docker) +CODEBASE_PATH=../.. diff --git a/services/ask-ai-bot/Dockerfile b/services/ask-ai-bot/Dockerfile index 5244e45ec3e5..8502228cc0a8 100644 --- a/services/ask-ai-bot/Dockerfile +++ b/services/ask-ai-bot/Dockerfile @@ -16,6 +16,7 @@ RUN bun run build FROM base AS production ENV NODE_ENV=production ENV DOCS_PATH=/app/docs +ENV CODEBASE_PATH=/app/codebase COPY --from=build /app/dist ./dist COPY --from=build /app/node_modules ./node_modules @@ -24,6 +25,10 @@ COPY --from=build /app/package.json ./ # Copy documentation (only docs/ subdirectory with markdown files) COPY documentation/docs ./docs +# Copy codebase source for code browsing +COPY ui/ ./codebase/ui/ +COPY crates ./codebase/crates + # Empty index.ts for discraft start to detect RUN touch index.ts diff --git a/services/ask-ai-bot/utils/ai/index.ts b/services/ask-ai-bot/utils/ai/index.ts index 3555591664ba..fcf8f0841c76 100644 --- a/services/ask-ai-bot/utils/ai/index.ts +++ b/services/ask-ai-bot/utils/ai/index.ts @@ -56,21 +56,35 @@ export async function answerQuestion({ for await (const event of result.fullStream) { if (event.type === "tool-call") { - if (event.toolName === "search_docs" && statusMessage) { + if (statusMessage) { try { - await statusMessage.edit("Searching the docs..."); - } catch (error) { - logger.verbose("Failed to update status message:", error); - } - } else if (event.toolName === "view_docs" && statusMessage) { - const input = event.input as { filePaths?: string | string[] }; - const filePaths = input.filePaths; - const pathArray = Array.isArray(filePaths) ? filePaths : [filePaths]; - const pagesText = pathArray.length === 1 ? "page" : "pages"; - try { - await statusMessage.edit( - `Viewing ${pathArray.length} ${pagesText}...`, - ); + if (event.toolName === "search_docs") { + await statusMessage.edit("Searching the docs..."); + } else if (event.toolName === "view_docs") { + const input = event.input as { filePaths?: string | string[] }; + const filePaths = input.filePaths; + const pathArray = Array.isArray(filePaths) + ? filePaths + : [filePaths]; + const pagesText = pathArray.length === 1 ? "page" : "pages"; + await statusMessage.edit( + `Viewing ${pathArray.length} ${pagesText}...`, + ); + } else if (event.toolName === "search_codebase") { + await statusMessage.edit("Searching the codebase..."); + } else if (event.toolName === "view_codebase") { + const input = event.input as { filePaths?: string | string[] }; + const filePaths = input.filePaths; + const pathArray = Array.isArray(filePaths) + ? filePaths + : [filePaths]; + const filesText = pathArray.length === 1 ? "file" : "files"; + await statusMessage.edit( + `Reading ${pathArray.length} source ${filesText}...`, + ); + } else if (event.toolName === "list_codebase_files") { + await statusMessage.edit("Exploring project structure..."); + } } catch (error) { logger.verbose("Failed to update status message:", error); } @@ -91,6 +105,24 @@ export async function answerQuestion({ if (pathArray.length > 0) { tracker.recordViewCall(pathArray); } + } else if (event.toolName === "search_codebase") { + const resultText = String(event.output); + const matchCount = (resultText.match(/\*\*[^*]+:\d+\*\*/g) || []) + .length; + tracker.recordCodeSearchCall(matchCount); + } else if (event.toolName === "view_codebase") { + const input = event.input as { filePaths?: string | string[] }; + const filePaths = input.filePaths; + const pathArray = Array.isArray(filePaths) + ? filePaths + : filePaths + ? [filePaths] + : []; + if (pathArray.length > 0) { + tracker.recordCodeViewCall(pathArray); + } + } else if (event.toolName === "list_codebase_files") { + tracker.recordListDir(); } } } diff --git a/services/ask-ai-bot/utils/ai/system-prompt.ts b/services/ask-ai-bot/utils/ai/system-prompt.ts index b62b8bc0525e..0941c6309868 100644 --- a/services/ask-ai-bot/utils/ai/system-prompt.ts +++ b/services/ask-ai-bot/utils/ai/system-prompt.ts @@ -1,6 +1,6 @@ import dedent from "dedent"; -export const MAX_STEPS = 10; +export const MAX_STEPS = 15; export function buildSystemPrompt(serverContext?: string): string { let prompt = dedent`You are a helpful assistant in the goose Discord server. @@ -8,12 +8,25 @@ Your role is to provide assistance and answer questions about codename goose, an You can perform a maximum of ${MAX_STEPS} steps (tool calls, text outputs, etc.). If you exceed this limit, no response will be provided to the user. BEFORE you reach the limit, STOP calling tools, respond to the user, and don't call any tools after your final response until the user asks another question. -When answering questions about goose: +## Documentation tools +When answering questions about how to use goose, configuration, setup, etc.: 1. Use the \`search_docs\` tool to find relevant documentation 2. Use the \`view_docs\` tool to read documentation (read multiple relevant files to get the full picture) 3. Iterate on steps 1 and 2 (not necessarily in order) until you have a deep understanding of the question and relevant documentation 4. Cite the documentation source in your response (using its Web URL) +## Codebase tools +When answering questions about how goose works internally, its architecture, implementation details, or when users ask about specific code: +1. Use \`search_codebase\` to grep for relevant code patterns (function names, struct names, error messages, etc.) +2. Use \`list_codebase_files\` to explore the project structure and find relevant directories +3. Use \`view_codebase\` to read the actual source code files +4. The codebase is split into two main areas: + - \`crates/\` - Rust backend code (core agent logic, CLI, server, MCP extensions) + - \`ui/\` - Electron/TypeScript desktop application and other UIs +5. Cite the source file in your response (using its GitHub URL) + +You can combine documentation and codebase tools in a single response when needed. For example, if a user asks how a feature works, you might search the docs for usage instructions AND search the codebase for the implementation. + When providing links, wrap the URL in angle brackets (e.g., \`\` or \`[Example]()\`) to prevent excessive link previews. Do not use backtick characters around the URL.`; if (serverContext) { diff --git a/services/ask-ai-bot/utils/ai/tool-tracker.ts b/services/ask-ai-bot/utils/ai/tool-tracker.ts index fe59d01293d3..1fd6c9426d70 100644 --- a/services/ask-ai-bot/utils/ai/tool-tracker.ts +++ b/services/ask-ai-bot/utils/ai/tool-tracker.ts @@ -1,39 +1,72 @@ export class ToolTracker { - private searchCalls: number = 0; - private searchResults: Set = new Set(); - private viewedPaths: Set = new Set(); + private docSearchCalls: number = 0; + private docSearchResults: Set = new Set(); + private viewedDocPaths: Set = new Set(); + private codeSearchCalls: number = 0; + private codeSearchResults: number = 0; + private viewedCodePaths: Set = new Set(); + private listedDirs: number = 0; recordSearchCall(results: string[]): void { - this.searchCalls++; - results.forEach((result) => this.searchResults.add(result)); + this.docSearchCalls++; + results.forEach((result) => this.docSearchResults.add(result)); } recordViewCall(filePaths: string | string[]): void { const paths = Array.isArray(filePaths) ? filePaths : [filePaths]; - paths.forEach((path) => this.viewedPaths.add(path)); + paths.forEach((path) => this.viewedDocPaths.add(path)); + } + + recordCodeSearchCall(resultCount: number): void { + this.codeSearchCalls++; + this.codeSearchResults += resultCount; + } + + recordCodeViewCall(filePaths: string | string[]): void { + const paths = Array.isArray(filePaths) ? filePaths : [filePaths]; + paths.forEach((path) => this.viewedCodePaths.add(path)); + } + + recordListDir(): void { + this.listedDirs++; } getSummary(): string { const parts: string[] = []; - if (this.searchCalls > 0) { - const resultCount = this.searchResults.size; - const timesText = this.searchCalls === 1 ? "time" : "times"; + if (this.docSearchCalls > 0) { + const resultCount = this.docSearchResults.size; + const timesText = this.docSearchCalls === 1 ? "time" : "times"; const resultsText = resultCount === 1 ? "result" : "results"; parts.push( - `searched ${this.searchCalls} ${timesText} with ${resultCount} ${resultsText}`, + `searched docs ${this.docSearchCalls} ${timesText} with ${resultCount} ${resultsText}`, ); } - if (this.viewedPaths.size > 0) { - const pageCount = this.viewedPaths.size; + if (this.viewedDocPaths.size > 0) { + const pageCount = this.viewedDocPaths.size; const pagesText = pageCount === 1 ? "page" : "pages"; - parts.push(`viewed ${pageCount} ${pagesText}`); + parts.push(`viewed ${pageCount} doc ${pagesText}`); + } + + if (this.codeSearchCalls > 0) { + const timesText = this.codeSearchCalls === 1 ? "time" : "times"; + const matchText = this.codeSearchResults === 1 ? "match" : "matches"; + parts.push( + `searched code ${this.codeSearchCalls} ${timesText} with ${this.codeSearchResults} ${matchText}`, + ); + } + + if (this.viewedCodePaths.size > 0) { + const fileCount = this.viewedCodePaths.size; + const filesText = fileCount === 1 ? "file" : "files"; + parts.push(`viewed ${fileCount} source ${filesText}`); } if (parts.length === 0) return ""; const firstPart = parts[0].charAt(0).toUpperCase() + parts[0].slice(1); - return parts.length === 1 ? firstPart : firstPart + ", " + parts[1]; + if (parts.length === 1) return firstPart; + return firstPart + ", " + parts.slice(1).join(", "); } } diff --git a/services/ask-ai-bot/utils/ai/tools/codebase-search.ts b/services/ask-ai-bot/utils/ai/tools/codebase-search.ts new file mode 100644 index 000000000000..a48381af7172 --- /dev/null +++ b/services/ask-ai-bot/utils/ai/tools/codebase-search.ts @@ -0,0 +1,222 @@ +import fs from "fs"; +import path from "path"; +import { logger } from "../../logger"; + +export interface CodeSearchResult { + filePath: string; + line: number; + content: string; + context: string; +} + +const SOURCE_EXTENSIONS = new Set([ + ".rs", + ".ts", + ".tsx", + ".js", + ".jsx", + ".json", + ".toml", + ".yaml", + ".yml", + ".css", + ".scss", + ".html", + ".md", + ".sql", + ".sh", + ".mts", +]); + +const IGNORED_DIRS = new Set([ + "node_modules", + "target", + "dist", + "out", + ".vite", + ".git", + "build", + "coverage", +]); + +function getCodebaseDir(): string { + return process.env.CODEBASE_PATH || path.join(process.cwd(), "../.."); +} + +function getSearchableDirs(): { name: string; path: string }[] { + const base = path.resolve(getCodebaseDir()); + return [ + { name: "ui", path: path.join(base, "ui") }, + { name: "crates", path: path.join(base, "crates") }, + ]; +} + +function shouldSkipDir(dirName: string): boolean { + return IGNORED_DIRS.has(dirName); +} + +function isSourceFile(fileName: string): boolean { + const ext = path.extname(fileName).toLowerCase(); + return SOURCE_EXTENSIONS.has(ext); +} + +function getContextLines( + lines: string[], + matchLine: number, + contextSize: number = 2, +): string { + const start = Math.max(0, matchLine - contextSize); + const end = Math.min(lines.length - 1, matchLine + contextSize); + const contextLines: string[] = []; + + for (let i = start; i <= end; i++) { + const prefix = i === matchLine ? ">" : " "; + contextLines.push(`${prefix} ${i + 1}: ${lines[i]}`); + } + + return contextLines.join("\n"); +} + +function searchInFile( + filePath: string, + pattern: RegExp, + baseDir: string, +): CodeSearchResult[] { + const results: CodeSearchResult[] = []; + + try { + const content = fs.readFileSync(filePath, "utf-8"); + const lines = content.split("\n"); + + for (let i = 0; i < lines.length; i++) { + if (pattern.test(lines[i])) { + const relativePath = path.relative(baseDir, filePath); + results.push({ + filePath: relativePath, + line: i + 1, + content: lines[i].trim(), + context: getContextLines(lines, i), + }); + } + } + } catch { + // Skip files that can't be read (binary, permissions, etc.) + } + + return results; +} + +function walkAndSearch( + dir: string, + pattern: RegExp, + baseDir: string, + results: CodeSearchResult[], + maxResults: number, +): void { + if (results.length >= maxResults) return; + + try { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + + for (const entry of entries) { + if (results.length >= maxResults) return; + + if (entry.isDirectory()) { + if (shouldSkipDir(entry.name)) continue; + walkAndSearch( + path.join(dir, entry.name), + pattern, + baseDir, + results, + maxResults, + ); + } else if (isSourceFile(entry.name)) { + const fileResults = searchInFile( + path.join(dir, entry.name), + pattern, + baseDir, + ); + for (const result of fileResults) { + if (results.length >= maxResults) return; + results.push(result); + } + } + } + } catch (error) { + logger.error(`Error walking directory ${dir}:`, error); + } +} + +export function searchCodebase( + query: string, + limit: number = 20, + scope?: string, +): CodeSearchResult[] { + const searchDirs = getSearchableDirs(); + const allResults: CodeSearchResult[] = []; + + let pattern: RegExp; + try { + pattern = new RegExp(query, "i"); + } catch { + pattern = new RegExp(query.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "i"); + } + + for (const dir of searchDirs) { + if (scope && dir.name !== scope) continue; + + if (!fs.existsSync(dir.path)) { + logger.warn(`Codebase directory not found: ${dir.path}`); + continue; + } + + walkAndSearch( + dir.path, + pattern, + path.resolve(getCodebaseDir()), + allResults, + limit, + ); + } + + logger.verbose( + `Code search for "${query}" returned ${allResults.length} results`, + ); + return allResults; +} + +export function listCodebaseFiles( + directory: string, +): { filePath: string; isDirectory: boolean }[] { + const baseDir = path.resolve(getCodebaseDir()); + const targetDir = path.resolve(path.join(baseDir, directory)); + + if (!targetDir.startsWith(baseDir + "/")) { + throw new Error("Invalid path - directory traversal not allowed"); + } + + if (!fs.existsSync(targetDir)) { + throw new Error(`Directory not found: ${directory}`); + } + + const stat = fs.statSync(targetDir); + if (!stat.isDirectory()) { + throw new Error(`Not a directory: ${directory}`); + } + + try { + const entries = fs.readdirSync(targetDir, { withFileTypes: true }); + return entries + .filter((entry) => !shouldSkipDir(entry.name)) + .map((entry) => ({ + filePath: path.join(directory, entry.name), + isDirectory: entry.isDirectory(), + })) + .sort((a, b) => { + if (a.isDirectory !== b.isDirectory) return a.isDirectory ? -1 : 1; + return a.filePath.localeCompare(b.filePath); + }); + } catch (error) { + throw new Error(`Failed to list directory: ${directory}`); + } +} diff --git a/services/ask-ai-bot/utils/ai/tools/codebase-viewer.ts b/services/ask-ai-bot/utils/ai/tools/codebase-viewer.ts new file mode 100644 index 000000000000..4d7d766913b3 --- /dev/null +++ b/services/ask-ai-bot/utils/ai/tools/codebase-viewer.ts @@ -0,0 +1,88 @@ +import fs from "fs"; +import path from "path"; + +const GITHUB_BASE_URL = "https://github.com/block/goose/blob/main"; + +function getCodebaseDir(): string { + return process.env.CODEBASE_PATH || path.join(process.cwd(), "../.."); +} + +function generateGitHubUrl(filePath: string, startLine?: number): string { + const url = `${GITHUB_BASE_URL}/${filePath}`; + if (startLine && startLine > 0) { + return `${url}#L${startLine}`; + } + return url; +} + +function getCodeChunk( + filePath: string, + startLine: number = 0, + lineCount: number = 200, +): { + filePath: string; + content: string; + totalLines: number; + githubUrl: string; +} { + const baseDir = path.resolve(getCodebaseDir()); + const fullPath = path.resolve(path.join(baseDir, filePath)); + + if (!fullPath.startsWith(baseDir + "/")) { + throw new Error("Invalid file path - directory traversal not allowed"); + } + + if (!fs.existsSync(fullPath)) { + throw new Error(`File not found: ${filePath}`); + } + + const stat = fs.statSync(fullPath); + if (stat.isDirectory()) { + throw new Error( + `Path is a directory, not a file: ${filePath}. Use search_codebase with scope to explore directories, or list_codebase_files to list directory contents.`, + ); + } + + const content = fs.readFileSync(fullPath, "utf-8"); + const lines = content.split("\n"); + const totalLines = lines.length; + + const actualStart = Math.max(0, Math.min(startLine, lines.length - 1)); + const actualEnd = Math.min(actualStart + lineCount, lines.length); + const chunkLines = lines.slice(actualStart, actualEnd); + + const numberedContent = chunkLines + .map((line, i) => `${actualStart + i + 1}: ${line}`) + .join("\n"); + + return { + filePath, + content: numberedContent, + totalLines, + githubUrl: generateGitHubUrl( + filePath, + actualStart > 0 ? actualStart + 1 : undefined, + ), + }; +} + +export function viewCodebaseFiles( + filePaths: string | string[], + startLine: number = 0, + lineCount: number = 200, +): string { + const paths = Array.isArray(filePaths) ? filePaths : [filePaths]; + + const results = paths.map((filePath) => { + const chunk = getCodeChunk(filePath, startLine, lineCount); + const ext = path.extname(filePath).slice(1) || "text"; + const lineInfo = + startLine > 0 + ? ` (lines ${startLine + 1}-${Math.min(startLine + lineCount, chunk.totalLines)} of ${chunk.totalLines})` + : ` (${chunk.totalLines} lines total)`; + + return `**${chunk.filePath}**${lineInfo}\nGitHub: <${chunk.githubUrl}>\n\`\`\`${ext}\n${chunk.content}\n\`\`\``; + }); + + return results.join("\n\n---\n\n"); +} diff --git a/services/ask-ai-bot/utils/ai/tools/docs-viewer.ts b/services/ask-ai-bot/utils/ai/tools/docs-viewer.ts index 7ff2ebf8b4d3..d3ea0142b990 100644 --- a/services/ask-ai-bot/utils/ai/tools/docs-viewer.ts +++ b/services/ask-ai-bot/utils/ai/tools/docs-viewer.ts @@ -59,7 +59,7 @@ function getDocChunk( const fullPath = path.join(docsDir, filePath); const normalizedPath = path.resolve(fullPath); - if (!normalizedPath.startsWith(docsDir)) { + if (!normalizedPath.startsWith(docsDir + "/")) { throw new Error("Invalid file path - directory traversal not allowed"); } diff --git a/services/ask-ai-bot/utils/ai/tools/index.ts b/services/ask-ai-bot/utils/ai/tools/index.ts index 0a55b562f376..b8031a6e2d2a 100644 --- a/services/ask-ai-bot/utils/ai/tools/index.ts +++ b/services/ask-ai-bot/utils/ai/tools/index.ts @@ -1,6 +1,8 @@ import { tool } from "ai"; import { z } from "zod"; import { logger } from "../../logger"; +import { listCodebaseFiles, searchCodebase } from "./codebase-search"; +import { viewCodebaseFiles } from "./codebase-viewer"; import { searchDocs } from "./docs-search"; import { viewDocs } from "./docs-viewer"; @@ -67,4 +69,108 @@ export const aiTools = { } }, }), + search_codebase: tool({ + description: + "Search the goose source code (Rust crates and TypeScript UI) using regex patterns. Searches across ui/ and crates/. Use this to find function definitions, struct/type definitions, imports, error messages, or any code pattern.", + inputSchema: z.object({ + query: z + .string() + .describe( + "Regex pattern to search for in the codebase (example: 'fn create_session', 'struct Provider', 'impl.*Agent')", + ), + limit: z + .number() + .optional() + .describe("Maximum number of results to return (default 20)"), + scope: z + .string() + .optional() + .describe( + "Limit search to a specific area: 'ui' for the desktop and other UIs, 'crates' for Rust backend code. Omit to search everything.", + ), + }), + execute: async ({ query, limit = 20, scope }) => { + try { + const results = searchCodebase(query, limit, scope); + + if (results.length === 0) { + return "No matches found in the codebase. Try a different pattern or broader search."; + } + + return results + .map( + (r) => `**${r.filePath}:${r.line}**\n\`\`\`\n${r.context}\n\`\`\``, + ) + .join("\n\n"); + } catch (error) { + const errorMsg = + error instanceof Error ? error.message : "Unknown error"; + logger.error(`Error searching codebase: ${errorMsg}`); + return `Error searching codebase: ${errorMsg}`; + } + }, + }), + view_codebase: tool({ + description: + "View source code file(s) from the goose codebase. Paths are relative to the repository root (e.g., 'crates/goose/src/agents/agent.rs' or 'ui/desktop/src/App.tsx').", + inputSchema: z.object({ + filePaths: z + .union([z.string(), z.array(z.string())]) + .describe( + "Path or array of paths to source files relative to the repo root (example: 'crates/goose/src/agents/agent.rs' or ['ui/desktop/src/main.ts', 'crates/goose-server/src/main.rs'])", + ), + startLine: z + .number() + .optional() + .describe("Starting line number (0-indexed, default 0)"), + lineCount: z + .number() + .optional() + .describe( + "Number of lines to show (default 200). Use smaller values for focused reading, larger for overview.", + ), + }), + execute: async ({ filePaths, startLine = 0, lineCount = 200 }) => { + try { + const result = viewCodebaseFiles(filePaths, startLine, lineCount); + const count = Array.isArray(filePaths) ? filePaths.length : 1; + logger.verbose(`Viewed ${count} codebase file(s)`); + return result; + } catch (error) { + const errorMsg = + error instanceof Error ? error.message : "Unknown error"; + logger.error(`Error viewing codebase: ${errorMsg}`); + return `Error viewing codebase: ${errorMsg}`; + } + }, + }), + list_codebase_files: tool({ + description: + "List files and directories in a codebase directory. Use this to explore the project structure before viewing specific files. Only works within ui/ and crates/.", + inputSchema: z.object({ + directory: z + .string() + .describe( + "Directory path relative to repo root (example: 'crates/goose/src', 'ui/desktop/src/components')", + ), + }), + execute: async ({ directory }) => { + try { + const entries = listCodebaseFiles(directory); + + if (entries.length === 0) { + return `Directory "${directory}" is empty.`; + } + + return entries + .map((e) => `${e.isDirectory ? "[dir] " : " "}${e.filePath}`) + .join("\n"); + } catch (error) { + const errorMsg = + error instanceof Error ? error.message : "Unknown error"; + logger.error(`Error listing codebase files: ${errorMsg}`); + return `Error listing files: ${errorMsg}`; + } + }, + }), };