-
Notifications
You must be signed in to change notification settings - Fork 6
t230: Prune duplicate FAILURE_PATTERN memories and prevent recurrence #954
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1730,6 +1730,147 @@ | |||||||||
| fi | ||||||||||
| } | ||||||||||
|
|
||||||||||
| ####################################### | ||||||||||
| # Prune repetitive pattern entries by keyword (t230) | ||||||||||
| # Consolidates entries where the same error/pattern keyword appears | ||||||||||
| # across many tasks, keeping only a few representative entries | ||||||||||
| ####################################### | ||||||||||
| cmd_prune_patterns() { | ||||||||||
| local keyword="" | ||||||||||
| local dry_run=false | ||||||||||
| local keep_count=3 | ||||||||||
| local types="FAILURE_PATTERN,ERROR_FIX,FAILED_APPROACH" | ||||||||||
|
|
||||||||||
| while [[ $# -gt 0 ]]; do | ||||||||||
| case "$1" in | ||||||||||
| --keyword) keyword="$2"; shift 2 ;; | ||||||||||
| --dry-run) dry_run=true; shift ;; | ||||||||||
| --keep) keep_count="$2"; shift 2 ;; | ||||||||||
| --types) types="$2"; shift 2 ;; | ||||||||||
| *) | ||||||||||
| if [[ -z "$keyword" ]]; then | ||||||||||
| keyword="$1" | ||||||||||
| fi | ||||||||||
| shift ;; | ||||||||||
| esac | ||||||||||
| done | ||||||||||
|
|
||||||||||
| if [[ -z "$keyword" ]]; then | ||||||||||
| log_error "Usage: memory-helper.sh prune-patterns <keyword> [--keep N] [--dry-run]" | ||||||||||
| log_error "Example: memory-helper.sh prune-patterns clean_exit_no_signal --keep 3" | ||||||||||
| return 1 | ||||||||||
| fi | ||||||||||
|
|
||||||||||
| # Validate keep_count is a positive integer | ||||||||||
| if ! [[ "$keep_count" =~ ^[1-9][0-9]*$ ]]; then | ||||||||||
| log_error "--keep must be a positive integer (got: $keep_count)" | ||||||||||
| return 1 | ||||||||||
| fi | ||||||||||
|
|
||||||||||
| init_db | ||||||||||
|
|
||||||||||
| # Build type filter SQL | ||||||||||
| local type_sql="" | ||||||||||
| local IFS=',' | ||||||||||
| local type_parts=() | ||||||||||
| read -ra type_parts <<< "$types" | ||||||||||
| unset IFS | ||||||||||
| local type_conditions=() | ||||||||||
| for t in "${type_parts[@]}"; do | ||||||||||
| type_conditions+=("'$t'") | ||||||||||
| done | ||||||||||
| type_sql=$(printf "%s," "${type_conditions[@]}") | ||||||||||
| type_sql="${type_sql%,}" | ||||||||||
|
|
||||||||||
| local escaped_keyword="${keyword//"'"/"''"}" | ||||||||||
|
|
||||||||||
| # Count matching entries | ||||||||||
| local total_count | ||||||||||
| total_count=$(db "$MEMORY_DB" \ | ||||||||||
| "SELECT COUNT(*) FROM learnings WHERE type IN ($type_sql) AND content LIKE '%${escaped_keyword}%';") | ||||||||||
|
|
||||||||||
| if [[ "$total_count" -le "$keep_count" ]]; then | ||||||||||
| log_success "Only $total_count entries match '$keyword' (keep=$keep_count). Nothing to prune." | ||||||||||
| return 0 | ||||||||||
| fi | ||||||||||
|
|
||||||||||
| local to_remove=$((total_count - keep_count)) | ||||||||||
| log_info "Found $total_count entries matching '$keyword' across types ($types)" | ||||||||||
| log_info "Will keep $keep_count newest entries, remove $to_remove" | ||||||||||
|
|
||||||||||
| if [[ "$dry_run" == true ]]; then | ||||||||||
| log_info "[DRY RUN] Would remove $to_remove entries. Entries to keep:" | ||||||||||
| db "$MEMORY_DB" <<EOF | ||||||||||
| SELECT id, type, substr(content, 1, 80) || '...' as preview, created_at | ||||||||||
| FROM learnings | ||||||||||
| WHERE type IN ($type_sql) AND content LIKE '%${escaped_keyword}%' | ||||||||||
| ORDER BY created_at DESC | ||||||||||
| LIMIT $keep_count; | ||||||||||
| EOF | ||||||||||
| echo "" | ||||||||||
| log_info "[DRY RUN] Sample entries to remove:" | ||||||||||
| db "$MEMORY_DB" <<EOF | ||||||||||
| SELECT id, type, substr(content, 1, 80) || '...' as preview, created_at | ||||||||||
| FROM learnings | ||||||||||
| WHERE type IN ($type_sql) AND content LIKE '%${escaped_keyword}%' | ||||||||||
| ORDER BY created_at DESC | ||||||||||
| LIMIT 5 OFFSET $keep_count; | ||||||||||
| EOF | ||||||||||
| return 0 | ||||||||||
| fi | ||||||||||
|
|
||||||||||
| # Backup before bulk delete | ||||||||||
| local prune_backup | ||||||||||
| prune_backup=$(backup_sqlite_db "$MEMORY_DB" "pre-prune-patterns") | ||||||||||
| if [[ $? -ne 0 || -z "$prune_backup" ]]; then | ||||||||||
| log_warn "Backup failed before prune-patterns — proceeding cautiously" | ||||||||||
| fi | ||||||||||
|
|
||||||||||
| # Get IDs to keep (newest N per type combination) | ||||||||||
| local keep_ids | ||||||||||
| keep_ids=$(db "$MEMORY_DB" <<EOF | ||||||||||
| SELECT id FROM learnings | ||||||||||
| WHERE type IN ($type_sql) AND content LIKE '%${escaped_keyword}%' | ||||||||||
| ORDER BY created_at DESC | ||||||||||
| LIMIT $keep_count; | ||||||||||
| EOF | ||||||||||
| ) | ||||||||||
|
|
||||||||||
| # Build exclusion list | ||||||||||
| local exclude_sql="" | ||||||||||
| while IFS= read -r kid; do | ||||||||||
| [[ -z "$kid" ]] && continue | ||||||||||
| local kid_esc="${kid//"'"/"''"}" | ||||||||||
| if [[ -z "$exclude_sql" ]]; then | ||||||||||
| exclude_sql="'$kid_esc'" | ||||||||||
| else | ||||||||||
| exclude_sql="$exclude_sql,'$kid_esc'" | ||||||||||
| fi | ||||||||||
| done <<< "$keep_ids" | ||||||||||
|
|
||||||||||
| # Delete matching entries except the ones we're keeping | ||||||||||
| local delete_where="type IN ($type_sql) AND content LIKE '%${escaped_keyword}%'" | ||||||||||
| if [[ -n "$exclude_sql" ]]; then | ||||||||||
| delete_where="$delete_where AND id NOT IN ($exclude_sql)" | ||||||||||
| fi | ||||||||||
|
|
||||||||||
| # Clean up relations first | ||||||||||
| db "$MEMORY_DB" "DELETE FROM learning_relations WHERE id IN (SELECT id FROM learnings WHERE $delete_where);" 2>/dev/null || true | ||||||||||
| db "$MEMORY_DB" "DELETE FROM learning_relations WHERE supersedes_id IN (SELECT id FROM learnings WHERE $delete_where);" 2>/dev/null || true | ||||||||||
|
Comment on lines
+1858
to
+1859
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The use of
Suggested change
References
|
||||||||||
| db "$MEMORY_DB" "DELETE FROM learning_access WHERE id IN (SELECT id FROM learnings WHERE $delete_where);" | ||||||||||
| db "$MEMORY_DB" "DELETE FROM learnings WHERE $delete_where;" | ||||||||||
|
|
||||||||||
| # Rebuild FTS index | ||||||||||
| db "$MEMORY_DB" "INSERT INTO learnings(learnings) VALUES('rebuild');" | ||||||||||
|
|
||||||||||
| log_success "Pruned $to_remove repetitive '$keyword' entries (kept $keep_count newest)" | ||||||||||
|
|
||||||||||
| # Clean up old backups | ||||||||||
| cleanup_sqlite_backups "$MEMORY_DB" 5 | ||||||||||
|
|
||||||||||
| return 0 | ||||||||||
| } | ||||||||||
|
|
||||||||||
| ####################################### | ||||||||||
| # Export memories | ||||||||||
| ####################################### | ||||||||||
|
|
@@ -2110,6 +2251,7 @@ | |||||||||
| stats Show memory statistics | ||||||||||
| validate Check for stale/duplicate entries (with detailed reports) | ||||||||||
| prune Remove old entries (auto-runs every 24h on store) | ||||||||||
| prune-patterns Remove repetitive pattern entries by keyword (e.g., clean_exit_no_signal) | ||||||||||
| dedup Remove exact and near-duplicate entries | ||||||||||
| consolidate Merge similar memories to reduce redundancy | ||||||||||
| export Export all memories | ||||||||||
|
|
@@ -2167,6 +2309,12 @@ | |||||||||
| --dry-run Show what would be deleted | ||||||||||
| --include-accessed Also prune accessed entries | ||||||||||
|
|
||||||||||
| PRUNE-PATTERNS OPTIONS: | ||||||||||
| <keyword> Error/pattern keyword to match (required) | ||||||||||
| --keep <n> Number of newest entries to keep (default: 3) | ||||||||||
| --types <list> Comma-separated types to search (default: FAILURE_PATTERN,ERROR_FIX,FAILED_APPROACH) | ||||||||||
| --dry-run Show what would be removed without deleting | ||||||||||
|
|
||||||||||
| DEDUP OPTIONS: | ||||||||||
| --dry-run Show what would be removed without deleting | ||||||||||
| --exact-only Only remove exact duplicates (skip near-duplicates) | ||||||||||
|
|
@@ -2321,6 +2469,7 @@ | |||||||||
| stats) cmd_stats ;; | ||||||||||
| validate) cmd_validate ;; | ||||||||||
| prune) cmd_prune "$@" ;; | ||||||||||
| prune-patterns) cmd_prune_patterns "$@" ;; | ||||||||||
| dedup) cmd_dedup "$@" ;; | ||||||||||
| consolidate) cmd_consolidate "$@" ;; | ||||||||||
| export) cmd_export "$@" ;; | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -10032,8 +10032,10 @@ store_failure_pattern() { | |||||||||||||
| retry) | ||||||||||||||
| # Only store retry patterns if they indicate a recurring issue | ||||||||||||||
| # Skip transient ones like rate_limited, timeout, interrupted | ||||||||||||||
| # Skip clean_exit_no_signal retries — infrastructure noise (t230) | ||||||||||||||
| # The blocked/failed outcomes above still capture the final state | ||||||||||||||
| case "$outcome_detail" in | ||||||||||||||
| rate_limited|timeout|interrupted_sigint|killed_sigkill|terminated_sigterm) | ||||||||||||||
| rate_limited|timeout|interrupted_sigint|killed_sigkill|terminated_sigterm|clean_exit_no_signal) | ||||||||||||||
| return 0 | ||||||||||||||
| ;; | ||||||||||||||
| esac | ||||||||||||||
|
|
@@ -10043,6 +10045,21 @@ store_failure_pattern() { | |||||||||||||
| ;; | ||||||||||||||
| esac | ||||||||||||||
|
|
||||||||||||||
| # Rate-limit: skip if 3+ entries with the same outcome_detail exist in last 24h (t230) | ||||||||||||||
| # Prevents memory pollution from repetitive infrastructure failures | ||||||||||||||
| local recent_count=0 | ||||||||||||||
| local escaped_detail | ||||||||||||||
| escaped_detail="$(sql_escape "$outcome_detail")" | ||||||||||||||
| if [[ -r "$MEMORY_DB" ]]; then | ||||||||||||||
| recent_count=$(sqlite3 "$MEMORY_DB" \ | ||||||||||||||
| "SELECT COUNT(*) FROM learnings WHERE type = 'FAILURE_PATTERN' AND content LIKE '%${escaped_detail}%' AND created_at > datetime('now', '-1 day');" \ | ||||||||||||||
| 2>/dev/null || echo "0") | ||||||||||||||
|
Comment on lines
+10054
to
+10056
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The use of
Suggested change
References
Comment on lines
+10048
to
+10056
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Harden the rate‑limit query against LIKE wildcard over‑matching. Line 10055: 🔧 Suggested fix (avoid LIKE wildcards)- local escaped_detail
- escaped_detail="$(sql_escape "$outcome_detail")"
- if [[ -r "$MEMORY_DB" ]]; then
- recent_count=$(sqlite3 "$MEMORY_DB" \
- "SELECT COUNT(*) FROM learnings WHERE type = 'FAILURE_PATTERN' AND content LIKE '%${escaped_detail}%' AND created_at > datetime('now', '-1 day');" \
- 2>/dev/null || echo "0")
- fi
+ local escaped_detail
+ escaped_detail="$(sql_escape "$outcome_detail")"
+ if [[ -r "$MEMORY_DB" && -n "$escaped_detail" ]]; then
+ recent_count=$(sqlite3 "$MEMORY_DB" \
+ "SELECT COUNT(*) FROM learnings WHERE type = 'FAILURE_PATTERN' AND instr(content, '${escaped_detail}') > 0 AND created_at > datetime('now', '-1 day');" \
+ 2>/dev/null || echo "0")
+ fi🤖 Prompt for AI Agents |
||||||||||||||
| fi | ||||||||||||||
| if [[ "$recent_count" -ge 3 ]]; then | ||||||||||||||
| log_info "Skipping failure pattern storage: $outcome_detail already has $recent_count entries in last 24h (t230)" | ||||||||||||||
| return 0 | ||||||||||||||
| fi | ||||||||||||||
|
|
||||||||||||||
| # Look up model tier from task record for pattern routing (t102.3) | ||||||||||||||
| local model_tier="" | ||||||||||||||
| local task_model | ||||||||||||||
|
|
||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CRITICAL: SQL injection vulnerability via unvalidated --types parameter.
The types from the
--typesparameter are wrapped in quotes but never validated againstVALID_TYPESbefore being used in SQL queries. An attacker could inject arbitrary SQL by providing a malicious type value.Example attack:
--types "X',DROP TABLE learnings--"Would generate:
type IN ('X',DROP TABLE learnings--')🔒 Proposed fix: Validate types before SQL use
type_sql="${type_sql%,}" + + # Validate all types against VALID_TYPES (prevent SQL injection) + for t in "${type_parts[@]}"; do + local type_pattern=" $t " + if [[ ! " $VALID_TYPES " =~ $type_pattern ]]; then + log_error "Invalid type in --types: '$t'" + log_error "Valid types: $VALID_TYPES" + return 1 + fi + done local escaped_keyword="${keyword//"'"/"''"}"This validation ensures that only known, safe type values are used in SQL queries, preventing injection attacks.
As per coding guidelines: "Security issues such as authentication/authorization flaws, injection (SQL/NoSQL)" must be addressed.
📝 Committable suggestion
🧰 Tools
🪛 GitHub Check: Codacy Static Code Analysis
[warning] 1774-1774: .agents/scripts/memory-helper.sh#L1774
The special variable IFS affects how splitting takes place when expanding unquoted variables.
🤖 Prompt for AI Agents