diff --git a/.agent/AGENTS.md b/.agent/AGENTS.md index 806a233b..2bd88b61 100644 --- a/.agent/AGENTS.md +++ b/.agent/AGENTS.md @@ -109,6 +109,9 @@ Run pre-edit-check.sh in `~/Git/aidevops/` BEFORE any changes to either location **Critical Rules**: - **Git check before edits**: See "MANDATORY: Pre-Edit Git Check" section above +- **File discovery**: Use `git ls-files` or `fd`, NOT `mcp_glob` (see File Discovery below) +- **Context budget**: Never consume >100K tokens on a single operation; for remote repos: fetch README first, check size with `gh api`, use `includePatterns` +- **Agent capability check**: Before edits, verify you have Edit/Write/Bash tools; if not, suggest switching to Build+ - NEVER create files in `~/` root - use `~/.aidevops/.agent-workspace/work/[project]/` for files needed only with the current task. - NEVER expose credentials in output/logs - Confirm destructive operations before execution @@ -117,13 +120,18 @@ Run pre-edit-check.sh in `~/Git/aidevops/` BEFORE any changes to either location **Quality Standards**: SonarCloud A-grade, ShellCheck zero violations -**File Discovery** (fastest to slowest): -1. `git ls-files '*.md'` - Instant, git-tracked files only -2. `fd -e md` or `fd -g '*.md'` - Fast, respects .gitignore, Rust-based -3. `rg --files -g '*.md'` - Fast, respects .gitignore (ripgrep) -4. `mcp_glob` tool - Fallback when bash unavailable or for complex patterns +**File Discovery** (STOP before using `mcp_glob`): -Use `git ls-files` for tracked files (most common). Use `fd` for untracked files or system-wide searches (e.g., `~/.config/`). The `mcp_glob` tool is CPU-intensive on large codebases. +Self-check: "Am I about to use `mcp_glob`?" If yes, use these instead: + +| Use Case | Command | Why | +|----------|---------|-----| +| Git-tracked files | `git ls-files '*.md'` | Instant, most common case | +| Untracked/system files | `fd -e md` or `fd -g '*.md'` | Fast, respects .gitignore | +| Content + file list | `rg --files -g '*.md'` | Fast, respects .gitignore | +| **Bash unavailable only** | `mcp_glob` tool | Last resort - CPU intensive | + +**Default**: `git ls-files` for any repo. `fd` for `~/.config/` or untracked files. **Localhost Standards** (for any local service setup): - **Always check port first**: `localhost-helper.sh check-port ` before starting services diff --git a/.agent/scripts/linters-local.sh b/.agent/scripts/linters-local.sh index 55ccf55a..ac797600 100755 --- a/.agent/scripts/linters-local.sh +++ b/.agent/scripts/linters-local.sh @@ -348,6 +348,103 @@ check_secrets() { } # Check AI-Powered Quality CLIs integration +check_markdown_lint() { + print_info "Checking Markdown Style..." + + local md_files + local violations=0 + local markdownlint_cmd="" + + # Find markdownlint command + if command -v markdownlint &> /dev/null; then + markdownlint_cmd="markdownlint" + elif [[ -f "node_modules/.bin/markdownlint" ]]; then + markdownlint_cmd="node_modules/.bin/markdownlint" + fi + + # Get markdown files to check: + # 1. Uncommitted changes (staged + unstaged) + # 2. If no uncommitted, check files changed in current branch vs main + # 3. Fallback to all tracked .md files in .agent/ + if git rev-parse --git-dir > /dev/null 2>&1; then + # First try uncommitted changes + md_files=$(git diff --name-only --diff-filter=ACMR HEAD -- '*.md' 2>/dev/null) + + # If no uncommitted, check branch diff vs main + if [[ -z "$md_files" ]]; then + local base_branch + base_branch=$(git merge-base HEAD main 2>/dev/null || git merge-base HEAD master 2>/dev/null || echo "") + if [[ -n "$base_branch" ]]; then + md_files=$(git diff --name-only "$base_branch" HEAD -- '*.md' 2>/dev/null) + fi + fi + + # Fallback: check all .agent/*.md files + if [[ -z "$md_files" ]]; then + md_files=$(git ls-files '.agent/**/*.md' 2>/dev/null) + fi + else + md_files=$(find . -name "*.md" -type f 2>/dev/null | grep -v node_modules) + fi + + if [[ -z "$md_files" ]]; then + print_success "Markdown: No markdown files to check" + return 0 + fi + + if [[ -n "$markdownlint_cmd" ]]; then + # Run markdownlint and capture output + local lint_output + lint_output=$($markdownlint_cmd $md_files 2>&1) || true + + if [[ -n "$lint_output" ]]; then + # Count violations (each line is a violation) + violations=$(echo "$lint_output" | grep -c "MD[0-9]" || echo "0") + + if [[ $violations -gt 0 ]]; then + print_warning "Markdown: $violations style issues found" + echo "$lint_output" | head -10 + if [[ $violations -gt 10 ]]; then + echo "... and $((violations - 10)) more" + fi + print_info "Run: markdownlint --fix .agent/**/*.md to auto-fix" + # Non-blocking for now - many pre-existing issues + # TODO: Make blocking after fixing existing issues + return 0 + fi + fi + print_success "Markdown: No style issues found" + else + # Fallback: basic checks without markdownlint + local issues=0 + + # Check for fenced code blocks without language (MD040) + # Pattern: line starts with optional whitespace, then ``` with nothing after (or just whitespace) + for file in $md_files; do + local count + # Use grep -E for extended regex (portable across macOS/Linux) + count=$(grep -cE '^[[:space:]]*```[[:space:]]*$' "$file" 2>/dev/null || echo "0") + if [[ $count -gt 0 ]]; then + print_warning "$file: $count fenced code blocks without language specifier" + grep -nE '^[[:space:]]*```[[:space:]]*$' "$file" 2>/dev/null | head -3 + issues=$((issues + count)) + fi + done + + if [[ $issues -gt 0 ]]; then + print_warning "Markdown: $issues issues found (install markdownlint for full checks)" + print_info "Install: npm install -g markdownlint-cli" + # Non-blocking for now - many pre-existing issues + # TODO: Make blocking after fixing existing issues + return 0 + fi + + print_success "Markdown: Basic checks passed (install markdownlint for full checks)" + fi + + return 0 +} + check_remote_cli_status() { print_info "Remote Audit CLIs Status (use /code-audit-remote for full analysis)..." @@ -418,12 +515,10 @@ main() { check_secrets || exit_code=1 echo "" - check_remote_cli_status - + check_markdown_lint || exit_code=1 echo "" - print_info "Markdown Formatting Tools Available:" - print_info "Run: bash .agent/scripts/markdown-lint-fix.sh manual . (for quick fixes)" - print_info "Run: bash .agent/scripts/markdown-formatter.sh format . (for comprehensive formatting)" + + check_remote_cli_status echo "" # Final summary diff --git a/.agent/tools/build-agent/build-agent.md b/.agent/tools/build-agent/build-agent.md index 68226357..99e01e6e 100644 --- a/.agent/tools/build-agent/build-agent.md +++ b/.agent/tools/build-agent/build-agent.md @@ -636,6 +636,27 @@ This protocol should also be reviewed when: - User feedback indicates protocol is too aggressive/passive - Duplicate detection fails to catch conflicts +### Tool Selection Checklist + +Before using tools, verify you're using the optimal choice: + +| Task | Preferred Tool | Avoid | Why | +|------|---------------|-------|-----| +| Find files by pattern | `git ls-files` or `fd` | `mcp_glob` | CLI is 10x faster | +| Search file contents | `rg` (ripgrep) | `mcp_grep` | CLI is more powerful | +| Read file contents | `mcp_read` | `cat` via bash | Better error handling | +| Edit files | `mcp_edit` | `sed` via bash | Safer, atomic | +| Web content | `mcp_webfetch` | `curl` via bash | Handles redirects | +| Remote repo research | `mcp_webfetch` README first | `mcp_repomix_pack_remote_repository` | Prevents context overload | + +**Self-check prompt**: Before calling any MCP tool, ask: +> "Is there a faster CLI alternative I should use via Bash?" + +**Context budget check**: Before context-heavy operations, ask: +> "Could this return >50K tokens? Have I checked the size first?" + +See `tools/context/context-guardrails.md` for detailed guardrails. + ### Agent File Structure Convention All agent files should follow this structure: diff --git a/.agent/tools/context/context-builder.md b/.agent/tools/context/context-builder.md index 3878d4ec..945c34ce 100644 --- a/.agent/tools/context/context-builder.md +++ b/.agent/tools/context/context-builder.md @@ -8,7 +8,7 @@ tools: bash: true glob: true grep: true - webfetch: false + webfetch: true task: true --- @@ -64,6 +64,30 @@ tools: - Import/export statements - Omits: implementation details, comments, empty lines +## CRITICAL: Remote Repository Guardrails + +**NEVER blindly pack a remote repository.** Follow this escalation: + +1. **Fetch README first** - `webfetch "https://github.com/{user}/{repo}"` (~1-5K tokens) +2. **Check repo size** - `gh api repos/{user}/{repo} --jq '.size'` (size in KB) +3. **Apply size thresholds**: + +| Repo Size (KB) | Est. Tokens | Action | +|----------------|-------------|--------| +| < 500 | < 50K | Safe for compressed pack | +| 500-2000 | 50-200K | Use `includePatterns` only | +| > 2000 | > 200K | **NEVER full pack** - targeted files only | + +4. **Use patterns** - `mcp_repomix_pack_remote_repository(..., includePatterns="README.md,src/**/*.ts")` + +**What NOT to do:** +```bash +# DANGEROUS - packs entire repo without size check +mcp_repomix_pack_remote_repository(remote="https://github.com/some/large-repo") +``` + +See `tools/context/context-guardrails.md` for full workflow and recovery procedures. + ## Overview diff --git a/.agent/tools/context/context-guardrails.md b/.agent/tools/context/context-guardrails.md new file mode 100644 index 00000000..88561100 --- /dev/null +++ b/.agent/tools/context/context-guardrails.md @@ -0,0 +1,180 @@ +--- +description: Context budget management and guardrails for AI assistants +mode: subagent +tools: + read: true + bash: true + webfetch: true +--- + +# Context Guardrails + + + +## Quick Reference + +- **Purpose**: Prevent context overload from large operations +- **Budget**: Reserve 100K tokens for conversation; never use >100K on context +- **Key Rule**: README first, check size, use patterns + +**Size Thresholds**: + +| Repo Size (KB) | Est. Tokens | Action | +|----------------|-------------|--------| +| < 500 | < 50K | Safe for compressed pack | +| 500-2000 | 50-200K | Use `includePatterns` only | +| > 2000 | > 200K | **NEVER full pack** - targeted files only | + +**Self-check before context-heavy operations**: +> "Could this operation return >50K tokens? Have I checked the size first?" + + + +## The Problem + +Claude's context window is 200K tokens. Context-heavy operations can easily exceed this: + +| Tool | Typical Output | Risk Level | +|------|----------------|------------| +| `repomix_pack_remote_repository` | 100K - 5M+ tokens | **EXTREME** | +| `mcp_grep` on large output | 10K - 500K tokens | **HIGH** | +| `webfetch` on docs site | 5K - 50K tokens | Medium | +| `mcp_read` single file | 1K - 20K tokens | Low | + +## Golden Rules + +1. **Budget**: Reserve 100K tokens for conversation. Never use >100K on context. +2. **Escalate gradually**: README -> specific files -> targeted patterns -> full pack (last resort) +3. **Pre-flight checks**: Always check size before packing remote repos +4. **Post-check output**: If grep/search returns >500 lines, DON'T load it all + +## Remote Repository Research Workflow + +```text +START + | + v ++-------------------------------------+ +| 1. Fetch README via webfetch | ~1-5K tokens +| (Understand purpose & structure) | ++-------------------------------------+ + | + v ++-------------------------------------+ +| 2. Check repo size | +| gh api repos/{u}/{r} --jq .size | ++-------------------------------------+ + | + +-- < 500 KB --> Safe for compressed pack + | + +-- 500KB-2MB --> Use includePatterns only + | + +-- > 2MB --> STOP - targeted files only +``` + +## Size Estimation + +GitHub API `.size` is in KB. Rough token estimate: + +- **Repo KB x 100 = approximate full-pack tokens** (very rough, matches thresholds: 500KB -> ~50K tokens) +- **Compressed mode reduces by ~70-80%** +- **Targeted patterns can reduce by 90-99%** + +## Tool-Specific Guardrails + +### mcp_repomix_pack_remote_repository + +MCP tool name: `mcp_repomix_pack_remote_repository` +Helper script: `context-builder-helper.sh remote user/repo [branch]` + +```bash +# BAD - no size check, no patterns +mcp_repomix_pack_remote_repository(remote="https://github.com/large/repo") + +# GOOD - size check first +gh api repos/large/repo --jq '.size' # Check KB +# If < 500 KB: +mcp_repomix_pack_remote_repository(remote="https://github.com/small/repo", compress=true) +# If > 500 KB: +mcp_repomix_pack_remote_repository(remote="https://github.com/large/repo", includePatterns="README.md,src/**/*.ts,docs/**") + +# Or use helper script: +context-builder-helper.sh remote large/repo main # Auto-compresses +``` + +### mcp_grep on large outputs + +```bash +# BAD - grepping on potentially huge output +mcp_repomix_grep_repomix_output(outputId="...", pattern="install") + +# GOOD - limit context lines, be specific +mcp_repomix_grep_repomix_output(outputId="...", pattern="^## Install", contextLines=5) +# Or read specific line ranges after finding matches +mcp_repomix_read_repomix_output(outputId="...", startLine=100, endLine=200) +``` + +### webfetch on documentation sites + +```bash +# CAUTION - docs sites can be large +webfetch("https://docs.example.com/") # May return 50K+ tokens + +# BETTER - target specific pages +webfetch("https://docs.example.com/getting-started") +webfetch("https://raw.githubusercontent.com/user/repo/main/README.md") +``` + +## Recovery from Context Overflow + +If you hit "prompt is too long": + +1. **Start a new conversation** - Context cannot be reduced mid-session +2. **Ask user what specific question they have** - Focus on the actual need +3. **Use targeted approach** - Get only needed context +4. **Document the failure** - Use `/remember` for future sessions: + + ```text + /remember FAILED_APPROACH: Attempted to pack {repo} without size check. + Repo was {size}KB (~{tokens} tokens). Use includePatterns next time. + ``` + +## File Discovery Guardrails + +Before using `mcp_glob`, check if faster alternatives work: + +| Use Case | Preferred Tool | Fallback | +|----------|---------------|----------| +| Git-tracked files | `git ls-files '*.md'` | `mcp_glob` | +| Untracked files | `fd -e md` | `mcp_glob` | +| System-wide search | `fd -g '*.md' ~/.config/` | `mcp_glob` | + +**Why?** `mcp_glob` is CPU-intensive on large codebases. CLI tools are 10x faster. + +## Agent Capability Check + +Before attempting edits, verify you have the required tools: + +```text +Self-check: "Do I have Edit/Write/Bash tools for this task?" + +If NO (e.g., in Plan+ agent): + -> Suggest: "This task requires edits. Please switch to Build+ agent." + +If YES: + -> Proceed with pre-edit git check +``` + +## Integration with Other Guardrails + +This document complements: + +- **Pre-Edit Git Check** (AGENTS.md) - Branch safety before edits +- **File Discovery** (AGENTS.md) - Tool selection for file operations +- **Context Builder** (context-builder.md) - Token-efficient context generation + +## Related Documentation + +- `tools/context/context-builder.md` - Repomix wrapper for context generation +- `tools/context/context7.md` - External library documentation +- `tools/build-agent/build-agent.md` - Agent design principles