diff --git a/.github/agents/issue-fix-agent.agent.md b/.github/agents/issue-fix-agent.agent.md index 3e099c868d..a871dc8415 100644 --- a/.github/agents/issue-fix-agent.agent.md +++ b/.github/agents/issue-fix-agent.agent.md @@ -121,8 +121,6 @@ cosmos_livesite_mcp: - "Python 3.x with virtual environment" ``` -### 0.2 GitHub MCP Server - ### 0.2 GitHub Access (SAML-Protected Repos) **⚠️ IMPORTANT: Azure org repos require SAML SSO - GitHub MCP Server is BLOCKED.** @@ -2752,11 +2750,11 @@ branch_to_pr_url: | Serialization | @serialization-owners | @sdk-team | | Encryption | @encryption-owners | @security-team | -### 7.4 Remote CI Validation (Azure Pipelines Gates) +### 7.5 Remote CI Validation (Azure Pipelines Gates) **Critical: Local tests are not sufficient. All fixes must pass the full Azure Pipelines CI gates.** -#### 7.4.1 CI Pipeline Structure +#### 7.5.1 CI Pipeline Structure The repository uses Azure Pipelines with multiple gate templates defined in `azure-pipelines.yml`: @@ -2814,7 +2812,7 @@ ci_gates: - Thin client variant builds ``` -#### 7.4.2 Local vs Remote CI Comparison +#### 7.5.2 Local vs Remote CI Comparison | Validation Type | Local Testing | Remote CI (Azure Pipelines) | |----------------|---------------|----------------------------| @@ -2827,7 +2825,7 @@ ci_gates: | **Cross-Platform** | ⚠️ Single OS | ✅ Windows matrix | | **Performance Gates** | ⚠️ Variable hardware | ✅ Consistent CI agents | -#### 7.4.3 Pre-PR Validation Workflow +#### 7.5.3 Pre-PR Validation Workflow ```yaml validation_workflow: @@ -3011,7 +3009,7 @@ validation_workflow: exit_condition: "All CI gates green" ``` -#### 7.4.4 CI Failure Triage +#### 7.5.4 CI Failure Triage **⚠️ CRITICAL: After fixing any CI failure, FULL local validation is required BEFORE pushing.** @@ -3115,7 +3113,7 @@ common_ci_failures: - Check if failure is in multi-region specific code ``` -#### 7.4.5 CI Gate Checklist for PR +#### 7.5.5 CI Gate Checklist for PR ```markdown ## CI Validation Checklist @@ -3148,7 +3146,7 @@ common_ci_failures: - [ ] PR ready for review (not draft) ``` -#### 7.4.6 Monitoring Tools +#### 7.5.6 Monitoring Tools ```yaml ci_monitoring_tools: @@ -3185,7 +3183,7 @@ ci_monitoring_tools: tail_lines: 500 ``` -#### 7.4.7 Accessing Azure DevOps CI Logs +#### 7.5.7 Accessing Azure DevOps CI Logs **Use Azure DevOps REST API with PAT (see Section 0.3.1 for setup).** @@ -4137,12 +4135,27 @@ flaky_tests: seen_in: ["PR #5573 (merged)", "PR #5583", "Build 59156"] action: "Retry failed stage via MCP server" - # Add more as discovered: - # TestName: - # location: "path/to/test" - # symptom: "description" - # seen_in: ["PR numbers where it failed"] - # action: "retry/skip/fix" + RetryTransientIssuesTestAsync: + location: "Microsoft.Azure.Cosmos.EmulatorTests" + symptom: "Transient failure during emulator tests" + seen_in: ["PR #5588", "PR #5587"] + action: "Retry failed stage via ADO API" + + EmulatorTests_Release_Flaky: + location: "CI stage: EmulatorTests Release Flaky" + symptom: "Various timing-related failures" + seen_in: ["PR #5588 (Build 59213)"] + action: "Retry failed stage via ADO API" + +pre_existing_failures: + description: "Tests that consistently fail in master (not flaky)" + note: "These are baseline issues, not caused by your PR changes" + tests: + - name: "TestQueryExecutionInfo_FromString" + location: "OptimisticDirectExecutionQueryBaselineTests.cs" + - name: "TestTextDistributionPlanParsingFromStream" + location: "OptimisticDirectExecutionQueryBaselineTests.cs" + handling: "Ignore if they fail - verify by running on master branch" handling_flaky_failures: step_1: "Check if failed test is in flaky registry" @@ -4857,7 +4870,9 @@ technical details. --- -### 16.12 Investigation Document Template +## 18. Reference Templates & Patterns + +### 18.1 Investigation Document Template **Create investigation docs in session workspace for complex issues:** @@ -4911,7 +4926,7 @@ investigation_document: - "When context compaction is likely" ``` -### 16.13 Commit Message Format +### 18.2 Commit Message Format **Follow conventional commit format for this repository:** @@ -4963,7 +4978,7 @@ commit_format: Fixes #5547 ``` -### 16.14 PR Description Template +### 18.3 PR Description Template **Full investigation details for Copilot-authored PRs:** @@ -5047,7 +5062,7 @@ pr_title_format: - "Query: Refactors SQL generation for better readability" ``` -### 16.15 Code Style (StyleCop & EditorConfig) +### 18.4 Code Style (StyleCop & EditorConfig) **Repository uses StyleCop.Analyzers and .editorconfig for code style enforcement.** @@ -5099,7 +5114,7 @@ editorconfig: - [ ] 4-space indentation - [ ] CRLF line endings -### 16.16 Async/Await & CancellationToken Patterns +### 18.5 Async/Await & CancellationToken Patterns ```yaml async_patterns: @@ -5131,7 +5146,7 @@ async_patterns: - "Never create fire-and-forget tasks without error handling" ``` -### 16.17 Error Handling Patterns +### 18.6 Error Handling Patterns ```yaml exception_handling: @@ -5166,7 +5181,7 @@ exception_handling: "503 ServiceUnavailable": "Transient, retry with backoff" ``` -### 16.18 Logging Conventions +### 18.7 Logging Conventions ```yaml logging: @@ -5196,7 +5211,7 @@ logging: never: "Sensitive data, PII, keys" ``` -### 16.19 Breaking Change Detection +### 18.8 Breaking Change Detection ```yaml api_contracts: @@ -5236,7 +5251,7 @@ api_contracts: - "If breaking, document in PR and get explicit approval" ``` -### 16.20 Security Review Checklist +### 18.9 Security Review Checklist ```yaml security_review: @@ -5260,7 +5275,7 @@ security_review: - "[ ] Key rotation supported" ``` -### 16.21 Performance Considerations +### 18.10 Performance Considerations ```yaml performance: @@ -5297,7 +5312,7 @@ performance: - "Add new benchmarks to Benchmark project if needed" ``` -### 16.22 Rollback Strategy +### 18.11 Rollback Strategy ```yaml rollback: @@ -5317,7 +5332,7 @@ rollback: - "Monitor after merge for 24h" ``` -### 16.23 Testing Patterns +### 18.12 Testing Patterns ```yaml testing: @@ -5351,7 +5366,7 @@ testing: update: "Run UpdateContracts.ps1 to refresh baselines" ``` -### 16.24 Dynamic .NET Version Testing +### 18.13 Dynamic .NET Version Testing **For issues that only reproduce on newer .NET versions (e.g., .NET 10):** @@ -5435,7 +5450,7 @@ dynamic_version_testing: - "Simple unit test is sufficient" ``` -### 16.25 Network and Region Debugging Techniques +### 18.14 Network and Region Debugging Techniques **Useful techniques for investigating connectivity and region-related issues:** @@ -5505,7 +5520,7 @@ network_debugging: --- -### 16.26 GitHub Comment Attribution +### 18.15 GitHub Comment Attribution **All GitHub comments posted by Copilot agents must include attribution:** @@ -5534,7 +5549,7 @@ github_comment_attribution: - "Builds trust with community" ``` -### 16.27 Awaiting Customer Response Workflow +### 18.16 Awaiting Customer Response Workflow **When investigation requires customer input before proceeding:** @@ -5596,7 +5611,7 @@ awaiting_customer_workflow: - "Track multiple awaiting issues in session plan" ``` -### 16.28 Issue Status Tracking in Session +### 18.17 Issue Status Tracking in Session **Maintain a status table when working multiple issues:** @@ -5630,6 +5645,106 @@ session_status_table: location: "Session plan.md in session-state folder" ``` +### 18.18 GitHub CLI Workarounds + +**Known issues with `gh` CLI and workarounds:** + +```yaml +gh_cli_workarounds: + gh_pr_edit_body_unreliable: + problem: "`gh pr edit --body-file` sometimes fails silently" + symptom: "Exit code 1 with deprecation warning, body not updated" + workaround: | + # Use gh api directly instead: + gh api repos/{owner}/{repo}/pulls/{pr_number} -X PATCH -F body=@body.md + example: | + gh api repos/Azure/azure-cosmos-dotnet-v3/pulls/5597 -X PATCH -F body=@pr_body.md + + gh_pr_edit_title_unreliable: + problem: "`gh pr edit --title` may fail with GraphQL warning" + workaround: | + gh api repos/{owner}/{repo}/pulls/{pr_number} -X PATCH -f title="New Title" +``` + +### 18.19 Sequence Diagrams in PRs + +**Add Mermaid sequence diagrams to help reviewers understand code flow:** + +```yaml +sequence_diagrams: + when_to_add: + - "Race conditions or timing-sensitive code" + - "Multi-component interactions (client → handler → service)" + - "Before/after behavior changes" + - "Error propagation paths" + + format: | + ```mermaid + sequenceDiagram + participant A as Component A + participant B as Component B + + A->>B: Method call + B-->>A: Return value + A--xB: Exception thrown + ``` + + example_before_after: | + ## Sequence Diagram + + ### Before Fix + ```mermaid + sequenceDiagram + App->>Client: Request() + Client--xApp: ❌ Confusing error + ``` + + ### After Fix + ```mermaid + sequenceDiagram + App->>Client: Request() + Client--xApp: ❌ Clear ObjectDisposedException + ``` + + benefits: + - "Reviewers understand flow at a glance" + - "Documents race condition scenarios" + - "Renders natively in GitHub markdown" +``` + +### 18.20 Review Feedback Workflow + +**Handling PR review comments efficiently:** + +```yaml +review_feedback_workflow: + on_comment_received: + step_1: "Read and understand the feedback" + step_2: "Implement the fix locally" + step_3: "Build and test" + step_4: "Commit with descriptive message" + step_5: "Push to PR branch" + step_6: "Reply to comment with commit SHA" + + reply_format: | + Good catch! Fixed in commit {sha}. + + **Change:** {brief description of what changed} + + multiple_comments: + approach: "Address all comments in one commit if related" + separate_if: "Comments are unrelated or complex" + + disagreeing_with_feedback: + approach: "Reply explaining your reasoning" + be_open: "Consider reviewer's perspective" + escalate_if: "Fundamental design disagreement" + + requesting_re_review: + after: "All comments addressed and pushed" + command: "gh pr edit {number} --add-reviewer {reviewer}" +``` + --- ## TODO: Implementation Tasks