diff --git a/packages/opencode/src/tool/bash.ts b/packages/opencode/src/tool/bash.ts index bf7c524941f..dd3d7c14121 100644 --- a/packages/opencode/src/tool/bash.ts +++ b/packages/opencode/src/tool/bash.ts @@ -58,7 +58,7 @@ export const BashTool = Tool.define("bash", async () => { return { description: DESCRIPTION.replaceAll("${directory}", Instance.directory) .replaceAll("${maxLines}", String(Truncate.MAX_LINES)) - .replaceAll("${maxBytes}", String(Truncate.MAX_BYTES)), + .replaceAll("${maxTokens}", String(Truncate.MAX_TOKENS)), parameters: z.object({ command: z.string().describe("The command to execute"), timeout: z.number().describe("Optional timeout in milliseconds").optional(), diff --git a/packages/opencode/src/tool/bash.txt b/packages/opencode/src/tool/bash.txt index 9fbc9fcf37e..71bab99afd1 100644 --- a/packages/opencode/src/tool/bash.txt +++ b/packages/opencode/src/tool/bash.txt @@ -24,7 +24,7 @@ Usage notes: - The command argument is required. - You can specify an optional timeout in milliseconds. If not specified, commands will time out after 120000ms (2 minutes). - It is very helpful if you write a clear, concise description of what this command does in 5-10 words. - - If the output exceeds ${maxLines} lines or ${maxBytes} bytes, it will be truncated and the full output will be written to a file. You can use Read with offset/limit to read specific sections or Grep to search the full content. Because of this, you do NOT need to use `head`, `tail`, or other truncation commands to limit output - just run the command directly. + - If the output exceeds ${maxLines} lines or ${maxTokens} tokens, it will be truncated and the full output will be written to a file. You can use Read with offset/limit to read specific sections or Grep to search the full content. Because of this, you do NOT need to use `head`, `tail`, or other truncation commands to limit output - just run the command directly. - Avoid using Bash with the `find`, `grep`, `cat`, `head`, `tail`, `sed`, `awk`, or `echo` commands, unless explicitly instructed or when these commands are truly necessary for the task. Instead, always prefer using the dedicated tools for these commands: - File search: Use Glob (NOT find or ls) diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts index 3b1484cbc0f..8b26b7a3753 100644 --- a/packages/opencode/src/tool/read.ts +++ b/packages/opencode/src/tool/read.ts @@ -8,10 +8,11 @@ import DESCRIPTION from "./read.txt" import { Instance } from "../project/instance" import { Identifier } from "../id/id" import { assertExternalDirectory } from "./external-directory" +import { Token } from "../util/token" const DEFAULT_READ_LIMIT = 2000 const MAX_LINE_LENGTH = 2000 -const MAX_BYTES = 50 * 1024 +const MAX_TOKENS = 10_000 export const ReadTool = Tool.define("read", { description: DESCRIPTION, @@ -94,17 +95,17 @@ export const ReadTool = Tool.define("read", { const lines = await file.text().then((text) => text.split("\n")) const raw: string[] = [] - let bytes = 0 - let truncatedByBytes = false + let tokens = 0 + let truncatedByTokens = false for (let i = offset; i < Math.min(lines.length, offset + limit); i++) { const line = lines[i].length > MAX_LINE_LENGTH ? lines[i].substring(0, MAX_LINE_LENGTH) + "..." : lines[i] - const size = Buffer.byteLength(line, "utf-8") + (raw.length > 0 ? 1 : 0) - if (bytes + size > MAX_BYTES) { - truncatedByBytes = true + const size = Token.estimate(line) + (raw.length > 0 ? 1 : 0) + if (tokens + size > MAX_TOKENS) { + truncatedByTokens = true break } raw.push(line) - bytes += size + tokens += size } const content = raw.map((line, index) => { @@ -118,10 +119,10 @@ export const ReadTool = Tool.define("read", { const totalLines = lines.length const lastReadLine = offset + raw.length const hasMoreLines = totalLines > lastReadLine - const truncated = hasMoreLines || truncatedByBytes + const truncated = hasMoreLines || truncatedByTokens - if (truncatedByBytes) { - output += `\n\n(Output truncated at ${MAX_BYTES} bytes. Use 'offset' parameter to read beyond line ${lastReadLine})` + if (truncatedByTokens) { + output += `\n\n(Output truncated at ${MAX_TOKENS} tokens. Use 'offset' parameter to read beyond line ${lastReadLine})` } else if (hasMoreLines) { output += `\n\n(File has more lines. Use 'offset' parameter to read beyond line ${lastReadLine})` } else { diff --git a/packages/opencode/src/tool/truncation.ts b/packages/opencode/src/tool/truncation.ts index 84e799c1310..61dc4079a59 100644 --- a/packages/opencode/src/tool/truncation.ts +++ b/packages/opencode/src/tool/truncation.ts @@ -3,12 +3,13 @@ import path from "path" import { Global } from "../global" import { Identifier } from "../id/id" import { PermissionNext } from "../permission/next" +import { Token } from "../util/token" import type { Agent } from "../agent/agent" import { Scheduler } from "../scheduler" export namespace Truncate { export const MAX_LINES = 2000 - export const MAX_BYTES = 50 * 1024 + export const MAX_TOKENS = 10_000 export const DIR = path.join(Global.Path.data, "tool-output") export const GLOB = path.join(DIR, "*") const RETENTION_MS = 7 * 24 * 60 * 60 * 1000 // 7 days @@ -18,7 +19,7 @@ export namespace Truncate { export interface Options { maxLines?: number - maxBytes?: number + maxTokens?: number direction?: "head" | "tail" } @@ -49,44 +50,44 @@ export namespace Truncate { export async function output(text: string, options: Options = {}, agent?: Agent.Info): Promise { const maxLines = options.maxLines ?? MAX_LINES - const maxBytes = options.maxBytes ?? MAX_BYTES + const maxTokens = options.maxTokens ?? MAX_TOKENS const direction = options.direction ?? "head" const lines = text.split("\n") - const totalBytes = Buffer.byteLength(text, "utf-8") + const totalTokens = Token.estimate(text) - if (lines.length <= maxLines && totalBytes <= maxBytes) { + if (lines.length <= maxLines && totalTokens <= maxTokens) { return { content: text, truncated: false } } const out: string[] = [] let i = 0 - let bytes = 0 - let hitBytes = false + let tokens = 0 + let hitTokens = false if (direction === "head") { for (i = 0; i < lines.length && i < maxLines; i++) { - const size = Buffer.byteLength(lines[i], "utf-8") + (i > 0 ? 1 : 0) - if (bytes + size > maxBytes) { - hitBytes = true + const size = Token.estimate(lines[i]) + (out.length > 0 ? 1 : 0) + if (tokens + size > maxTokens) { + hitTokens = true break } out.push(lines[i]) - bytes += size + tokens += size } } else { for (i = lines.length - 1; i >= 0 && out.length < maxLines; i--) { - const size = Buffer.byteLength(lines[i], "utf-8") + (out.length > 0 ? 1 : 0) - if (bytes + size > maxBytes) { - hitBytes = true + const size = Token.estimate(lines[i]) + (out.length > 0 ? 1 : 0) + if (tokens + size > maxTokens) { + hitTokens = true break } out.unshift(lines[i]) - bytes += size + tokens += size } } - const removed = hitBytes ? totalBytes - bytes : lines.length - out.length - const unit = hitBytes ? "bytes" : "lines" + const removed = hitTokens ? totalTokens - tokens : lines.length - out.length + const unit = hitTokens ? "tokens" : "lines" const preview = out.join("\n") const id = Identifier.ascending("tool") diff --git a/packages/opencode/test/tool/bash.test.ts b/packages/opencode/test/tool/bash.test.ts index 750ff8193e9..e8749bc5540 100644 --- a/packages/opencode/test/tool/bash.test.ts +++ b/packages/opencode/test/tool/bash.test.ts @@ -253,16 +253,17 @@ describe("tool.bash truncation", () => { }) }) - test("truncates output exceeding byte limit", async () => { + test("truncates output exceeding token limit", async () => { await Instance.provide({ directory: projectRoot, fn: async () => { const bash = await BashTool.init() - const byteCount = Truncate.MAX_BYTES + 10000 + const tokenCount = Truncate.MAX_TOKENS + 1000 + const byteCount = tokenCount * 4 const result = await bash.execute( { command: `head -c ${byteCount} /dev/zero | tr '\\0' 'a'`, - description: "Generate bytes exceeding limit", + description: "Generate tokens exceeding limit", }, ctx, ) diff --git a/packages/opencode/test/tool/truncation.test.ts b/packages/opencode/test/tool/truncation.test.ts index 09222f279fa..3551a8bd930 100644 --- a/packages/opencode/test/tool/truncation.test.ts +++ b/packages/opencode/test/tool/truncation.test.ts @@ -1,6 +1,7 @@ import { describe, test, expect, afterAll } from "bun:test" import { Truncate } from "../../src/tool/truncation" import { Identifier } from "../../src/id/id" +import { Token } from "../../src/util/token" import fs from "fs/promises" import path from "path" @@ -8,7 +9,7 @@ const FIXTURES_DIR = path.join(import.meta.dir, "fixtures") describe("Truncate", () => { describe("output", () => { - test("truncates large json file by bytes", async () => { + test("truncates large json file by tokens", async () => { const content = await Bun.file(path.join(FIXTURES_DIR, "models-api.json")).text() const result = await Truncate.output(content) @@ -33,9 +34,9 @@ describe("Truncate", () => { expect(result.content).toContain("...90 lines truncated...") }) - test("truncates by byte count", async () => { + test("truncates by token count", async () => { const content = "a".repeat(1000) - const result = await Truncate.output(content, { maxBytes: 100 }) + const result = await Truncate.output(content, { maxTokens: 100 }) expect(result.truncated).toBe(true) expect(result.content).toContain("truncated...") @@ -63,18 +64,18 @@ describe("Truncate", () => { expect(result.content).not.toContain("line0") }) - test("uses default MAX_LINES and MAX_BYTES", () => { + test("uses default MAX_LINES and MAX_TOKENS", () => { expect(Truncate.MAX_LINES).toBe(2000) - expect(Truncate.MAX_BYTES).toBe(50 * 1024) + expect(Truncate.MAX_TOKENS).toBe(10_000) }) - test("large single-line file truncates with byte message", async () => { + test("large single-line file truncates with token message", async () => { const content = await Bun.file(path.join(FIXTURES_DIR, "models-api.json")).text() const result = await Truncate.output(content) expect(result.truncated).toBe(true) - expect(result.content).toContain("bytes truncated...") - expect(Buffer.byteLength(content, "utf-8")).toBeGreaterThan(Truncate.MAX_BYTES) + expect(result.content).toContain("tokens truncated...") + expect(Token.estimate(content)).toBeGreaterThan(Truncate.MAX_TOKENS) }) test("writes full output to file when truncated", async () => {