Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/sync-manifest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ workflows:
- source: .github/workflows/agents-auto-label.yml
description: "Auto-label - suggests/applies labels based on semantic matching (Phase 5A)"

- source: .github/workflows/agents-capability-check.yml
description: "Capability check - pre-flight agent feasibility gate (Phase 3A)"

- source: .github/workflows/agents-decompose.yml
description: "Task decomposition - breaks large issues into sub-tasks (Phase 3B)"

- source: .github/workflows/agents-dedup.yml
description: "Duplicate detection - flags similar open issues (Phase 3C)"

Comment on lines +75 to +83
Copy link

Copilot AI Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sync manifest references workflow files that don't exist in the source repository. The manifest lists agents-capability-check.yml, agents-decompose.yml, and agents-dedup.yml at lines 75, 78, and 81, but these files only exist in templates/consumer-repo/.github/workflows/ and not in .github/workflows/. The sync workflow will fail when trying to sync these files to consumer repos. Either create these workflows in .github/workflows/ or remove them from the sync manifest until they're ready.

Suggested change
- source: .github/workflows/agents-capability-check.yml
description: "Capability check - pre-flight agent feasibility gate (Phase 3A)"
- source: .github/workflows/agents-decompose.yml
description: "Task decomposition - breaks large issues into sub-tasks (Phase 3B)"
- source: .github/workflows/agents-dedup.yml
description: "Duplicate detection - flags similar open issues (Phase 3C)"

Copilot uses AI. Check for mistakes.
- source: .github/workflows/agents-guard.yml
description: "Agents guard - enforces agents workflow protections (Health 45)"

Expand Down
699 changes: 586 additions & 113 deletions docs/plans/langchain-post-code-rollout.md

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions scripts/langchain/pr_verifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,14 @@ def _parse_llm_response(content: str, provider: str) -> EvaluationResult:
)


def _is_auth_error(exc: Exception) -> bool:
"""Check if an exception is an authentication/authorization error."""
exc_str = str(exc).lower()
# Common auth error patterns from various LLM APIs
auth_patterns = ["401", "unauthorized", "forbidden", "403", "permission", "authentication"]
return any(pattern in exc_str for pattern in auth_patterns)


def evaluate_pr(
context: str,
diff: str | None = None,
Expand All @@ -484,6 +492,34 @@ def evaluate_pr(
try:
response = client.invoke(prompt)
except Exception as exc: # pragma: no cover - exercised in integration
# If auth error and not explicitly requesting a provider, try fallback
if _is_auth_error(exc) and provider is None:
fallback_provider = "openai" if "github-models" in provider_name else "github-models"
fallback_resolved = _get_llm_client(model=model, provider=fallback_provider)
if fallback_resolved is not None:
fallback_client, fallback_provider_name = fallback_resolved
try:
response = fallback_client.invoke(prompt)
content = getattr(response, "content", None) or str(response)
result = _parse_llm_response(content, fallback_provider_name)
# Add note about fallback
if result.summary:
result = EvaluationResult(
verdict=result.verdict,
scores=result.scores,
concerns=result.concerns,
summary=result.summary,
provider_used=fallback_provider_name,
model=result.model,
used_llm=result.used_llm,
error=f"Primary provider ({provider_name}) failed, used fallback",
raw_content=result.raw_content,
)
return result
except Exception as fallback_exc:
return _fallback_evaluation(
f"Primary ({provider_name}): {exc}; Fallback ({fallback_provider_name}): {fallback_exc}"
)
Comment on lines +497 to +522
Copy link

Copilot AI Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fallback logic has a bug: when provider is None (auto mode), the code checks if "github-models" is in provider_name to determine the fallback provider. However, if provider_name is "openai/gpt-4o", the fallback would be "github-models", which could fail with the same auth error. The logic should check the actual provider_name value to avoid circular fallback attempts. Consider: fallback_provider = "openai" if "github-models" in provider_name else "github-models" might fail if OpenAI was the primary and GitHub Models lacks permission.

Suggested change
fallback_provider = "openai" if "github-models" in provider_name else "github-models"
fallback_resolved = _get_llm_client(model=model, provider=fallback_provider)
if fallback_resolved is not None:
fallback_client, fallback_provider_name = fallback_resolved
try:
response = fallback_client.invoke(prompt)
content = getattr(response, "content", None) or str(response)
result = _parse_llm_response(content, fallback_provider_name)
# Add note about fallback
if result.summary:
result = EvaluationResult(
verdict=result.verdict,
scores=result.scores,
concerns=result.concerns,
summary=result.summary,
provider_used=fallback_provider_name,
model=result.model,
used_llm=result.used_llm,
error=f"Primary provider ({provider_name}) failed, used fallback",
raw_content=result.raw_content,
)
return result
except Exception as fallback_exc:
return _fallback_evaluation(
f"Primary ({provider_name}): {exc}; Fallback ({fallback_provider_name}): {fallback_exc}"
)
# Determine the base provider from the resolved provider name and choose the opposite
base_provider = provider_name.split("/", 1)[0] if provider_name else ""
if base_provider == "github-models":
fallback_provider = "openai"
elif base_provider == "openai":
fallback_provider = "github-models"
else:
fallback_provider = None
if fallback_provider is not None and fallback_provider != base_provider:
fallback_resolved = _get_llm_client(model=model, provider=fallback_provider)
if fallback_resolved is not None:
fallback_client, fallback_provider_name = fallback_resolved
try:
response = fallback_client.invoke(prompt)
content = getattr(response, "content", None) or str(response)
result = _parse_llm_response(content, fallback_provider_name)
# Add note about fallback
if result.summary:
result = EvaluationResult(
verdict=result.verdict,
scores=result.scores,
concerns=result.concerns,
summary=result.summary,
provider_used=fallback_provider_name,
model=result.model,
used_llm=result.used_llm,
error=f"Primary provider ({provider_name}) failed, used fallback",
raw_content=result.raw_content,
)
return result
except Exception as fallback_exc:
return _fallback_evaluation(
f"Primary ({provider_name}): {exc}; Fallback ({fallback_provider_name}): {fallback_exc}"
)

Copilot uses AI. Check for mistakes.
return _fallback_evaluation(f"LLM invocation failed: {exc}")

content = getattr(response, "content", None) or str(response)
Expand Down
265 changes: 265 additions & 0 deletions templates/consumer-repo/.github/workflows/agents-auto-label.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
name: Auto-Label Issues

# Suggests or applies labels to new issues based on semantic matching
# Uses label_matcher.py for embedding-based similarity

on:
issues:
types: [opened, edited]

permissions:
contents: read
issues: write

env:
# Threshold for auto-applying labels (very high confidence)
AUTO_APPLY_THRESHOLD: "0.90"
# Threshold for suggesting labels (lower, for comments)
SUGGEST_THRESHOLD: "0.75"

jobs:
auto-label:
runs-on: ubuntu-latest
# Skip if issue already has agent-related labels
if: |
!contains(github.event.issue.labels.*.name, 'agents:formatted') &&
!contains(github.event.issue.labels.*.name, 'agent:codex') &&
!contains(github.event.issue.labels.*.name, 'automated')

steps:
- name: Checkout Workflows repo
uses: actions/checkout@v6
with:
repository: stranske/Workflows
path: workflows-repo

Comment on lines +33 to +35
Copy link

Copilot AI Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The repository value uses a conditional that will always resolve to 'stranske/Workflows'. When github.repository == 'stranske/Workflows', it returns github.repository (which is 'stranske/Workflows'). When the condition is false, it returns 'stranske/Workflows'. This makes the conditional redundant. Simply use repository: stranske/Workflows for clarity.

Copilot uses AI. Check for mistakes.
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install dependencies
run: |
cd workflows-repo
pip install -e ".[langchain]" --quiet

- name: Get repo labels
id: get-labels
uses: actions/github-script@v8
with:
script: |
// Paginate to get all labels (handles repos with >100 labels)
const labels = await github.paginate(
github.rest.issues.listLabelsForRepo,
{
owner: context.repo.owner,
repo: context.repo.repo,
per_page: 100
}
);

const labelData = labels.map(l => ({
name: l.name,
description: l.description || ''
}));

core.setOutput('labels_json', JSON.stringify(labelData));
core.info(`Found ${labels.length} labels in repo`);

- name: Match labels
id: match
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
LABELS_JSON: ${{ steps.get-labels.outputs.labels_json }}
ISSUE_TITLE: ${{ github.event.issue.title }}
ISSUE_BODY: ${{ github.event.issue.body }}
run: |
cd workflows-repo
python3 << 'PYTHON_SCRIPT'
import json
import os
import sys

# Add scripts to path
sys.path.insert(0, '.')

from scripts.langchain.label_matcher import (
build_label_vector_store,
find_similar_labels,
LabelRecord,
)

# Get issue content
issue_title = os.environ.get('ISSUE_TITLE', '')
issue_body = os.environ.get('ISSUE_BODY', '')
query = f"{issue_title}\n\n{issue_body}"

# Get thresholds
auto_threshold = float(os.environ.get('AUTO_APPLY_THRESHOLD', '0.90'))
suggest_threshold = float(os.environ.get('SUGGEST_THRESHOLD', '0.75'))

# Parse labels
labels_json = os.environ.get('LABELS_JSON', '[]')
labels = json.loads(labels_json)

if not labels:
print("No labels found in repo")
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write('has_suggestions=false\n')
sys.exit(0)

# Build vector store
label_records = [
LabelRecord(name=l['name'], description=l['description'])
for l in labels
]
store = build_label_vector_store(label_records)

if store is None:
print("Could not build label vector store (missing embeddings)")
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write('has_suggestions=false\n')
sys.exit(0)

# Find matches
matches = find_similar_labels(store, query, threshold=suggest_threshold, k=5)

if not matches:
print("No label matches found above threshold")
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write('has_suggestions=false\n')
sys.exit(0)

# Separate auto-apply from suggestions
auto_apply = [m for m in matches if m.score >= auto_threshold]
suggestions = [m for m in matches if suggest_threshold <= m.score < auto_threshold]

print(f"Auto-apply labels ({auto_threshold}+ confidence):")
for m in auto_apply:
print(f" - {m.label.name}: {m.score:.2%}")

print(f"Suggested labels ({suggest_threshold}-{auto_threshold} confidence):")
for m in suggestions:
print(f" - {m.label.name}: {m.score:.2%}")

# Output results
auto_labels = json.dumps([m.label.name for m in auto_apply])
suggest_json = json.dumps([
{'name': m.label.name, 'score': f'{m.score:.0%}'}
for m in suggestions
])
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write('has_suggestions=true\n')
f.write(f'auto_apply_labels={auto_labels}\n')
f.write(f'suggested_labels={suggest_json}\n')

PYTHON_SCRIPT

- name: Apply high-confidence labels
if: |
steps.match.outputs.has_suggestions == 'true' &&
steps.match.outputs.auto_apply_labels != '[]'
uses: actions/github-script@v8
with:
script: |
const autoApplyLabels = JSON.parse('${{ steps.match.outputs.auto_apply_labels }}');

if (autoApplyLabels.length === 0) {
core.info('No labels to auto-apply');
return;
}

// Get current labels
const { data: issue } = await github.rest.issues.get({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number
});

const currentLabels = issue.labels.map(l => l.name);
const newLabels = autoApplyLabels.filter(l => !currentLabels.includes(l));

if (newLabels.length === 0) {
core.info('All suggested labels already present');
return;
}

// Add labels
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
labels: newLabels
});

core.info(`Applied labels: ${newLabels.join(', ')}`);

- name: Post suggestion comment
if: |
steps.match.outputs.has_suggestions == 'true' &&
steps.match.outputs.suggested_labels != '[]'
uses: actions/github-script@v8
with:
script: |
const suggestedLabels = JSON.parse('${{ steps.match.outputs.suggested_labels }}');
const autoApplied = JSON.parse('${{ steps.match.outputs.auto_apply_labels }}');

if (suggestedLabels.length === 0) {
core.info('No suggestions to post');
return;
}

// Build suggestion list
const suggestions = suggestedLabels
.map(l => `- \`${l.name}\` (${l.score} confidence)`)
.join('\n');

let body = `### 🏷️ Label Suggestions\n\n`;
body += `Based on the issue content, these labels might be relevant:\n\n`;
body += `${suggestions}\n\n`;

if (autoApplied.length > 0) {
const appliedStr = autoApplied.map(l => `\`${l}\``).join(', ');
body += `**Auto-applied:** ${appliedStr}\n\n`;
}

body += `<details>\n<summary>How to use these suggestions</summary>\n\n`;
body += `- Click the label name in the sidebar to add it\n`;
const ghCmd = `gh issue edit ${context.issue.number} --add-label "label-name"`;
body += `- Or use the GitHub CLI: \`${ghCmd}\`\n`;
body += `</details>\n\n`;
body += `---\n*Auto-generated by label matcher*`;

// Check for existing suggestion comment
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
per_page: 30
});

const existingComment = comments.find(c =>
c.body.includes('### 🏷️ Label Suggestions') &&
c.user.type === 'Bot'
);

if (existingComment) {
// Update existing comment
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existingComment.id,
body: body
});
core.info('Updated existing suggestion comment');
} else {
// Create new comment
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: body
});
core.info('Posted label suggestions');
}
Loading
Loading