Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 34 additions & 8 deletions .github/workflows/agents-auto-label.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@ jobs:
sys.exit(0)

# Build vector store
label_records = [LabelRecord(name=l['name'], description=l['description']) for l in labels]
label_records = [
LabelRecord(name=l['name'], description=l['description'])
for l in labels
]
store = build_label_vector_store(label_records)

if store is None:
Expand All @@ -128,6 +131,16 @@ jobs:
auto_apply = [m for m in matches if m.score >= auto_threshold]
suggestions = [m for m in matches if suggest_threshold <= m.score < auto_threshold]

# IMPORTANT: Only auto-apply the BEST matching label, not all above threshold
# This prevents over-labeling issues with multiple labels like bug+enhancement
if auto_apply:
best_match = auto_apply[0] # matches are already sorted by score descending
auto_apply = [best_match]
# Move other high-confidence matches to suggestions
for m in matches[1:]:
if m.score >= auto_threshold and m not in suggestions:
suggestions.insert(0, m)
Comment on lines 131 to +142
Copy link

Copilot AI Jan 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic to move other high-confidence matches to suggestions has a potential bug. The condition m not in suggestions will always be true because suggestions only contains matches with scores below auto_threshold (line 132), while this loop checks matches with scores >= auto_threshold. This means duplicates could be added to the suggestions list. Additionally, iterating over matches[1:] and checking m.score >= auto_threshold is redundant since we already have the auto_apply list containing all matches above the threshold.

Suggested change
auto_apply = [m for m in matches if m.score >= auto_threshold]
suggestions = [m for m in matches if suggest_threshold <= m.score < auto_threshold]
# IMPORTANT: Only auto-apply the BEST matching label, not all above threshold
# This prevents over-labeling issues with multiple labels like bug+enhancement
if auto_apply:
best_match = auto_apply[0] # matches are already sorted by score descending
auto_apply = [best_match]
# Move other high-confidence matches to suggestions
for m in matches[1:]:
if m.score >= auto_threshold and m not in suggestions:
suggestions.insert(0, m)
auto_apply_full = [m for m in matches if m.score >= auto_threshold]
auto_apply = list(auto_apply_full)
suggestions = [m for m in matches if suggest_threshold <= m.score < auto_threshold]
# IMPORTANT: Only auto-apply the BEST matching label, not all above threshold
# This prevents over-labeling issues with multiple labels like bug+enhancement
if auto_apply:
best_match = auto_apply[0] # matches are already sorted by score descending
# Keep only the best match for auto-apply
auto_apply = [best_match]
# Move other high-confidence matches (remaining auto_apply) to suggestions
other_high = auto_apply_full[1:] if 'auto_apply_full' in locals() else []
for m in reversed(other_high):
suggestions.insert(0, m)

Copilot uses AI. Check for mistakes.

print(f"Auto-apply labels ({auto_threshold}+ confidence):")
for m in auto_apply:
print(f" - {m.label.name}: {m.score:.2%}")
Expand All @@ -139,13 +152,20 @@ jobs:
# Output results
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write('has_suggestions=true\n')
f.write(f'auto_apply_labels={json.dumps([m.label.name for m in auto_apply])}\n')
f.write(f'suggested_labels={json.dumps([{"name": m.label.name, "score": f"{m.score:.0%}"} for m in suggestions])}\n')
auto_json = json.dumps([m.label.name for m in auto_apply])
f.write(f'auto_apply_labels={auto_json}\n')
sugg_data = [
{"name": m.label.name, "score": f"{m.score:.0%}"}
for m in suggestions
]
f.write(f'suggested_labels={json.dumps(sugg_data)}\n')

PYTHON_SCRIPT

- name: Apply high-confidence labels
if: steps.match.outputs.has_suggestions == 'true' && steps.match.outputs.auto_apply_labels != '[]'
if: |
steps.match.outputs.has_suggestions == 'true' &&
steps.match.outputs.auto_apply_labels != '[]'
uses: actions/github-script@v8
with:
script: |
Expand Down Expand Up @@ -182,7 +202,9 @@ jobs:
core.info(`Applied labels: ${newLabels.join(', ')}`);

- name: Post suggestion comment
if: steps.match.outputs.has_suggestions == 'true' && steps.match.outputs.suggested_labels != '[]'
if: |
steps.match.outputs.has_suggestions == 'true' &&
steps.match.outputs.suggested_labels != '[]'
uses: actions/github-script@v8
with:
script: |
Expand All @@ -199,15 +221,19 @@ jobs:
.map(l => `- \`${l.name}\` (${l.score} confidence)`)
.join('\n');

let body = `### 🏷️ Label Suggestions\n\nBased on the issue content, these labels might be relevant:\n\n${suggestions}\n\n`;
let body = `### 🏷️ Label Suggestions\n\n`;
body += `Based on the issue content, these labels might be relevant:\n\n`;
body += `${suggestions}\n\n`;

if (autoApplied.length > 0) {
body += `**Auto-applied:** ${autoApplied.map(l => `\`${l}\``).join(', ')}\n\n`;
const applied = autoApplied.map(l => `\`${l}\``).join(', ');
body += `**Auto-applied:** ${applied}\n\n`;
}

body += `<details>\n<summary>How to use these suggestions</summary>\n\n`;
body += `- Click the label name in the sidebar to add it\n`;
body += `- Or use the GitHub CLI: \`gh issue edit ${context.issue.number} --add-label "label-name"\`\n`;
const editCmd = `gh issue edit ${context.issue.number} --add-label "label-name"`;
body += `- Or use the GitHub CLI: \`${editCmd}\`\n`;
body += `</details>\n\n`;
body += `---\n*Auto-generated by label matcher*`;

Expand Down
29 changes: 26 additions & 3 deletions .github/workflows/agents-dedup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ permissions:

env:
# Similarity threshold for flagging duplicates (0.0-1.0)
# 0.85 = very similar, reduces false positives
SIMILARITY_THRESHOLD: "0.85"
# 0.92 = very high similarity required, reduces false positives from
# issues in the same domain/feature area that share vocabulary
SIMILARITY_THRESHOLD: "0.92"

jobs:
dedup:
Expand Down Expand Up @@ -122,9 +123,31 @@ jobs:
new_body = os.environ.get('NEW_ISSUE_BODY', '')
query = f'{new_title}\n\n{new_body}'

threshold = float(os.environ.get('SIMILARITY_THRESHOLD', '0.85'))
threshold = float(os.environ.get('SIMILARITY_THRESHOLD', '0.92'))
matches = find_similar_issues(store, query, threshold=threshold, k=3)

# Additional filter: require title similarity for true duplicates
# This reduces false positives from issues in the same domain/feature area
# that share vocabulary but are different tasks
filtered_matches = []
new_title_lower = new_title.lower().strip()
for m in matches:
match_title_lower = m.issue.title.lower().strip()
# Check for significant title overlap
title_words_new = set(new_title_lower.split())
title_words_match = set(match_title_lower.split())
shared_words = title_words_new.intersection(title_words_match)
# Require at least 40% of words to overlap for a duplicate flag
max_words = max(len(title_words_new), len(title_words_match), 1)
overlap_ratio = len(shared_words) / max_words
Comment on lines +140 to +142
Copy link

Copilot AI Jan 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The overlap ratio calculation uses the maximum of the two title lengths as the denominator, but this may not accurately represent the degree of overlap between titles. Consider using the minimum length instead, or a Jaccard similarity metric (intersection over union).

For example, if a new issue has title "Add caching" (2 words) and an existing issue has "Add PUT endpoint to update manager caching layer optimization" (8 words), and they share "Add" and "caching" (2 words), the current calculation gives 2/8 = 25% overlap, which would miss this as a duplicate even if the body content is similar. Using min would give 2/2 = 100% overlap, which better reflects that all words in the shorter title are present in the longer one.

Suggested change
# Require at least 40% of words to overlap for a duplicate flag
max_words = max(len(title_words_new), len(title_words_match), 1)
overlap_ratio = len(shared_words) / max_words
# Require at least 40% of words in the shorter title to overlap for a duplicate flag
min_words = max(min(len(title_words_new), len(title_words_match)), 1)
overlap_ratio = len(shared_words) / min_words

Copilot uses AI. Check for mistakes.
if m.score >= 0.95 or overlap_ratio >= 0.4:
filtered_matches.append(m)
print(f' Match #{m.issue.number}: {m.score:.0%}, overlap={overlap_ratio:.0%}')
else:
print(f' Skip #{m.issue.number}: {m.score:.0%}, overlap={overlap_ratio:.0%}')

matches = filtered_matches

if not matches:
print('No duplicates found above threshold')
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
Expand Down
Loading
Loading