From 98f22f8025e5f5f218a4a588777ba72d4add30fc Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 20:16:26 -0400 Subject: [PATCH 01/21] create github action to automate codeowner change --- .github/workflows/update-codeowners.yml | 99 +++++++++++++++++++++++++ scripts/codeowner_analyzer.py | 14 +++- 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/update-codeowners.yml diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml new file mode 100644 index 0000000000..d7ae9fba8c --- /dev/null +++ b/.github/workflows/update-codeowners.yml @@ -0,0 +1,99 @@ +name: Update CODEOWNERS + +on: + schedule: + # Run weekly on Monday at 00:00 UTC + - cron: '0 0 * * 1' + workflow_dispatch: # Allow manual triggering + +permissions: + contents: write + pull-requests: write + +jobs: + update-codeowners: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repository + uses: actions/checkout@v4.2.2 + with: + fetch-depth: 0 # Fetch full history for accurate analysis + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Run CODEOWNERS analyzer + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + python scripts/codeowner_analyzer.py \ + --output .github/CODEOWNERS \ + --depth 3 \ + --min-commits 2 \ + --days-back 365 + + - name: Check for changes + id: check_changes + run: | + if git diff --quiet .github/CODEOWNERS; then + echo "changed=false" >> $GITHUB_OUTPUT + echo "No changes detected in CODEOWNERS" + else + echo "changed=true" >> $GITHUB_OUTPUT + echo "Changes detected in CODEOWNERS" + fi + + - name: Create Pull Request + if: steps.check_changes.outputs.changed == 'true' + uses: peter-evans/create-pull-request@v7 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: | + chore: update CODEOWNERS based on git history + + Auto-generated CODEOWNERS update based on commit activity over the last 365 days. + + 🤖 Generated with [Claude Code](https://claude.com/claude-code) + + Co-Authored-By: Claude + branch: auto-update-codeowners + delete-branch: true + title: 'chore: Update CODEOWNERS' + body: | + ## Summary + + This PR updates the CODEOWNERS file based on git commit history analysis from the last 365 days. + + ## Changes + + - Updated `.github/CODEOWNERS` with current code ownership based on: + - Commit frequency + - File coverage + - Commit recency + + ## How to Review + + 1. Review the changes to `.github/CODEOWNERS` + 2. Verify that the assigned owners are appropriate for each module + 3. Make manual adjustments if needed before merging + + ## Notes + + - This is an automated PR generated weekly + - Minimum commits threshold: 2 + - Analysis period: 365 days + - Directory depth: 3 levels + + --- + + 🤖 This PR was automatically generated by the [update-codeowners workflow](.github/workflows/update-codeowners.yml) + labels: | + automated + maintenance + assignees: | + + reviewers: | diff --git a/scripts/codeowner_analyzer.py b/scripts/codeowner_analyzer.py index 8018244fd2..c7c6190c7e 100644 --- a/scripts/codeowner_analyzer.py +++ b/scripts/codeowner_analyzer.py @@ -31,6 +31,7 @@ def __init__( github_token: Optional[str] = None, use_api: bool = True, allowed_users: Optional[List[str]] = None, + max_depth: int = 3, ): """ Initialize the code owners analyzer. @@ -43,10 +44,12 @@ def __init__( github_token: Optional GitHub API token for higher rate limits use_api: Whether to use GitHub API for email lookups (default: True) allowed_users: Optional list of GitHub usernames to include (filters out others) + max_depth: Maximum directory depth for module detection (default: 3) """ self.repo_path = Path(repo_path).resolve() self.min_commits = min_commits self.days_back = days_back + self.max_depth = max_depth self.module_owners: DefaultDict[str, DefaultDict[str, int]] = defaultdict( lambda: defaultdict(int) ) @@ -439,8 +442,10 @@ def get_modules(self) -> List[str]: if file_ext in relevant_extensions: # Add the directory and all parent directories as modules + # Limited by max_depth path_parts = Path(dir_path).parts - for i in range(1, len(path_parts) + 1): + max_parts = min(len(path_parts), self.max_depth) + for i in range(1, max_parts + 1): module = "/".join(path_parts[:i]) if not self.should_exclude(module): modules.add(module) @@ -773,6 +778,12 @@ def main() -> int: "--allowed-users-file", help="File containing allowed GitHub usernames, one per line", ) + parser.add_argument( + "--depth", + type=int, + default=3, + help="Maximum directory depth for module detection (default: 3)", + ) args = parser.parse_args() @@ -811,6 +822,7 @@ def main() -> int: github_token=args.github_token, use_api=not args.no_api, allowed_users=allowed_users, + max_depth=args.depth, ) except ValueError as e: print(f"Error: {e}", file=sys.stderr) From b5fb3db154d8c13036ac07571f563665c063b782 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 20:31:05 -0400 Subject: [PATCH 02/21] upd --- .github/workflows/update-codeowners.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index d7ae9fba8c..fa6fb634d5 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -34,7 +34,8 @@ jobs: --output .github/CODEOWNERS \ --depth 3 \ --min-commits 2 \ - --days-back 365 + --days-back 365 \ + --no-api - name: Check for changes id: check_changes From a87e632fefc1d919918a7d2fb5cc02a5c5367f74 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 20:34:31 -0400 Subject: [PATCH 03/21] upd --- .github/workflows/update-codeowners.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index fa6fb634d5..86b1101ceb 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -5,6 +5,10 @@ on: # Run weekly on Monday at 00:00 UTC - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering + # NOTE(Zihao): debugging only, remove later + pull_request: + branches: + - main permissions: contents: write From 4e22b6b3dbd80c38d6fbb03ea73339717c4704f7 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 21:53:22 -0400 Subject: [PATCH 04/21] upd --- .github/workflows/update-codeowners.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 86b1101ceb..9c500ccc2e 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -44,7 +44,11 @@ jobs: - name: Check for changes id: check_changes run: | - if git diff --quiet .github/CODEOWNERS; then + # Check if CODEOWNERS is empty or has changed + if [ ! -s .github/CODEOWNERS ]; then + echo "changed=true" >> $GITHUB_OUTPUT + echo "CODEOWNERS is empty or missing" + elif git diff --quiet .github/CODEOWNERS; then echo "changed=false" >> $GITHUB_OUTPUT echo "No changes detected in CODEOWNERS" else From f8a1a8cfe71e6a4773a4b0793387e362499b74ec Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 21:57:11 -0400 Subject: [PATCH 05/21] upd --- .github/workflows/update-codeowners.yml | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 9c500ccc2e..867781532b 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -44,16 +44,20 @@ jobs: - name: Check for changes id: check_changes run: | - # Check if CODEOWNERS is empty or has changed - if [ ! -s .github/CODEOWNERS ]; then - echo "changed=true" >> $GITHUB_OUTPUT - echo "CODEOWNERS is empty or missing" - elif git diff --quiet .github/CODEOWNERS; then - echo "changed=false" >> $GITHUB_OUTPUT - echo "No changes detected in CODEOWNERS" + # Check if CODEOWNERS file is new (unstaged) or has changes + if git ls-files --error-unmatch .github/CODEOWNERS >/dev/null 2>&1; then + # File is tracked, check for changes + if git diff --quiet .github/CODEOWNERS; then + echo "changed=false" >> $GITHUB_OUTPUT + echo "No changes detected in CODEOWNERS" + else + echo "changed=true" >> $GITHUB_OUTPUT + echo "Changes detected in CODEOWNERS" + fi else + # File is untracked (newly created) echo "changed=true" >> $GITHUB_OUTPUT - echo "Changes detected in CODEOWNERS" + echo "CODEOWNERS file is new" fi - name: Create Pull Request From d4df395c8b910af9de383e8fa6f2423671045e11 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:03:03 -0400 Subject: [PATCH 06/21] upd --- .github/workflows/update-codeowners.yml | 5 +++-- scripts/codeowner_analyzer.py | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 867781532b..329c907299 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -37,8 +37,9 @@ jobs: python scripts/codeowner_analyzer.py \ --output .github/CODEOWNERS \ --depth 3 \ - --min-commits 2 \ - --days-back 365 \ + --min-commits 1 \ + --days-back 180 \ + --top-n 5 \ --no-api - name: Check for changes diff --git a/scripts/codeowner_analyzer.py b/scripts/codeowner_analyzer.py index c7c6190c7e..b18ec0a807 100644 --- a/scripts/codeowner_analyzer.py +++ b/scripts/codeowner_analyzer.py @@ -32,6 +32,7 @@ def __init__( use_api: bool = True, allowed_users: Optional[List[str]] = None, max_depth: int = 3, + top_n_owners: int = 3, ): """ Initialize the code owners analyzer. @@ -45,11 +46,13 @@ def __init__( use_api: Whether to use GitHub API for email lookups (default: True) allowed_users: Optional list of GitHub usernames to include (filters out others) max_depth: Maximum directory depth for module detection (default: 3) + top_n_owners: Number of top owners to include in CODEOWNERS file (default: 3) """ self.repo_path = Path(repo_path).resolve() self.min_commits = min_commits self.days_back = days_back self.max_depth = max_depth + self.top_n_owners = top_n_owners self.module_owners: DefaultDict[str, DefaultDict[str, int]] = defaultdict( lambda: defaultdict(int) ) @@ -659,10 +662,10 @@ def generate_codeowners_file( for module, data in results.items(): if data["owners"]: - # Take top 3 owners or those with ownership score > 0.1 + # Take top N owners or those with ownership score > 0.1 top_owners = [ owner - for owner in data["owners"][:3] + for owner in data["owners"][: self.top_n_owners] if owner["ownership_score"] > 0.1 ] @@ -784,6 +787,12 @@ def main() -> int: default=3, help="Maximum directory depth for module detection (default: 3)", ) + parser.add_argument( + "--top-n", + type=int, + default=3, + help="Number of top owners to include in CODEOWNERS file (default: 3)", + ) args = parser.parse_args() @@ -823,6 +832,7 @@ def main() -> int: use_api=not args.no_api, allowed_users=allowed_users, max_depth=args.depth, + top_n_owners=args.top_n, ) except ValueError as e: print(f"Error: {e}", file=sys.stderr) From 757cb326ebdac5d50c2ff3801bd0c79ec84c36b8 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:05:29 -0400 Subject: [PATCH 07/21] upd --- .github/workflows/update-codeowners.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 329c907299..66520d49a1 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -75,6 +75,7 @@ jobs: Co-Authored-By: Claude branch: auto-update-codeowners + base: main delete-branch: true title: 'chore: Update CODEOWNERS' body: | From 3934511289a8155772762407722ca1375b5dcf4a Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:08:42 -0400 Subject: [PATCH 08/21] upd --- .github/workflows/update-codeowners.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 66520d49a1..b4a0db6827 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -23,7 +23,7 @@ jobs: uses: actions/checkout@v4.2.2 with: fetch-depth: 0 # Fetch full history for accurate analysis - token: ${{ secrets.GITHUB_TOKEN }} + token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} - name: Set up Python uses: actions/setup-python@v5 @@ -32,7 +32,7 @@ jobs: - name: Run CODEOWNERS analyzer env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} run: | python scripts/codeowner_analyzer.py \ --output .github/CODEOWNERS \ @@ -65,7 +65,7 @@ jobs: if: steps.check_changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: - token: ${{ secrets.GITHUB_TOKEN }} + token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} commit-message: | chore: update CODEOWNERS based on git history From 560b43497b58dd305a3c59fefed4504ea80b4f7c Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:11:18 -0400 Subject: [PATCH 09/21] Revert "upd" This reverts commit 3934511289a8155772762407722ca1375b5dcf4a. --- .github/workflows/update-codeowners.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index b4a0db6827..66520d49a1 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -23,7 +23,7 @@ jobs: uses: actions/checkout@v4.2.2 with: fetch-depth: 0 # Fetch full history for accurate analysis - token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} + token: ${{ secrets.GITHUB_TOKEN }} - name: Set up Python uses: actions/setup-python@v5 @@ -32,7 +32,7 @@ jobs: - name: Run CODEOWNERS analyzer env: - GITHUB_TOKEN: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | python scripts/codeowner_analyzer.py \ --output .github/CODEOWNERS \ @@ -65,7 +65,7 @@ jobs: if: steps.check_changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: - token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} + token: ${{ secrets.GITHUB_TOKEN }} commit-message: | chore: update CODEOWNERS based on git history From 130b8b317d5d99ccf4c4964d0ef316d95d094ac2 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:18:40 -0400 Subject: [PATCH 10/21] upd --- .github/workflows/update-codeowners.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 66520d49a1..2a7dfcba9a 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -65,7 +65,7 @@ jobs: if: steps.check_changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: - token: ${{ secrets.GITHUB_TOKEN }} + token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} commit-message: | chore: update CODEOWNERS based on git history From 3b5e0aa7f951dcc49b603a9785aa5069a6809763 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:25:53 -0400 Subject: [PATCH 11/21] upd --- .github/workflows/update-codeowners.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 2a7dfcba9a..cfe836ac35 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -6,7 +6,7 @@ on: - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering # NOTE(Zihao): debugging only, remove later - pull_request: + pull_request_target: branches: - main @@ -65,7 +65,7 @@ jobs: if: steps.check_changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: - token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} + token: ${{ secrets.GITHUB_TOKEN }} commit-message: | chore: update CODEOWNERS based on git history From c4027303a1a393cb729ae43757c5aa066f3d72cf Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:29:16 -0400 Subject: [PATCH 12/21] test: switch to pull_request with FLASHINFER_GITHUB_TOKEN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Testing if the token has permission to create PRs from pull_request trigger. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .github/workflows/update-codeowners.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index cfe836ac35..2a7dfcba9a 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -6,7 +6,7 @@ on: - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering # NOTE(Zihao): debugging only, remove later - pull_request_target: + pull_request: branches: - main @@ -65,7 +65,7 @@ jobs: if: steps.check_changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: - token: ${{ secrets.GITHUB_TOKEN }} + token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} commit-message: | chore: update CODEOWNERS based on git history From f0385496efd46cdb49a0d7668f7cd9c14df09c39 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:40:04 -0400 Subject: [PATCH 13/21] chore: remove pull_request trigger from update-codeowners workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use workflow_dispatch for testing instead to ensure secrets are accessible. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .github/workflows/update-codeowners.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 2a7dfcba9a..a48c2f61f2 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -5,10 +5,6 @@ on: # Run weekly on Monday at 00:00 UTC - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering - # NOTE(Zihao): debugging only, remove later - pull_request: - branches: - - main permissions: contents: write @@ -65,7 +61,7 @@ jobs: if: steps.check_changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: - token: ${{ secrets.FLASHINFER_GITHUB_TOKEN }} + token: ${{ secrets.GITHUB_TOKEN }} commit-message: | chore: update CODEOWNERS based on git history From 9cc7e985e8ab214559ef7fd53818473e246e511f Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sun, 5 Oct 2025 22:53:02 -0400 Subject: [PATCH 14/21] upd --- .github/workflows/update-codeowners.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index a48c2f61f2..0b3f2f04a0 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -5,6 +5,9 @@ on: # Run weekly on Monday at 00:00 UTC - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering + pull_request: + branches: + - main permissions: contents: write From 75476931c7dd0c59222cdfdb6ab95cd08451abd7 Mon Sep 17 00:00:00 2001 From: yzh119 Date: Mon, 6 Oct 2025 10:09:42 -0700 Subject: [PATCH 15/21] disable triggering from pull request --- .github/workflows/update-codeowners.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 0b3f2f04a0..a48c2f61f2 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -5,9 +5,6 @@ on: # Run weekly on Monday at 00:00 UTC - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering - pull_request: - branches: - - main permissions: contents: write From 86c78edaa1bd7116238c9b65db68fe5da62efa0d Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Tue, 14 Oct 2025 12:56:59 -0400 Subject: [PATCH 16/21] updated script to use github cli --- scripts/codeowner_analyzer.py | 279 ++++++++-------------------------- 1 file changed, 63 insertions(+), 216 deletions(-) diff --git a/scripts/codeowner_analyzer.py b/scripts/codeowner_analyzer.py index b18ec0a807..86e4a675a2 100644 --- a/scripts/codeowner_analyzer.py +++ b/scripts/codeowner_analyzer.py @@ -14,11 +14,8 @@ from collections import defaultdict from datetime import datetime, timedelta from pathlib import Path -from typing import List, Dict, Optional, Tuple, Any, Set, DefaultDict +from typing import List, Dict, Optional, Tuple, Any, DefaultDict import re -import urllib.request -import urllib.error -import time class CodeOwnersAnalyzer: @@ -28,8 +25,6 @@ def __init__( min_commits: int = 2, days_back: int = 365, exclude_patterns: Optional[List[str]] = None, - github_token: Optional[str] = None, - use_api: bool = True, allowed_users: Optional[List[str]] = None, max_depth: int = 3, top_n_owners: int = 3, @@ -42,8 +37,6 @@ def __init__( min_commits: Minimum commits required to be considered an owner days_back: How many days back to analyze (default: 1 year) exclude_patterns: List of path patterns to exclude from analysis - github_token: Optional GitHub API token for higher rate limits - use_api: Whether to use GitHub API for email lookups (default: True) allowed_users: Optional list of GitHub usernames to include (filters out others) max_depth: Maximum directory depth for module detection (default: 3) top_n_owners: Number of top owners to include in CODEOWNERS file (default: 3) @@ -53,17 +46,9 @@ def __init__( self.days_back = days_back self.max_depth = max_depth self.top_n_owners = top_n_owners - self.module_owners: DefaultDict[str, DefaultDict[str, int]] = defaultdict( - lambda: defaultdict(int) - ) - self.module_files: DefaultDict[str, Set[str]] = defaultdict(set) self.email_to_github: Dict[ str, str ] = {} # Cache for email to GitHub username mappings - self.github_token = github_token or os.environ.get("GITHUB_TOKEN") - self.use_api = use_api - self.api_call_count = 0 - self.last_api_call_time: float = 0 # Convert allowed users to lowercase for case-insensitive comparison self.allowed_users = ( set(u.lower() for u in allowed_users) if allowed_users else None @@ -96,15 +81,28 @@ def __init__( if not (self.repo_path / ".git").exists(): raise ValueError(f"Not a git repository: {self.repo_path}") + # Check if gh CLI is available + try: + subprocess.run( + ["gh", "--version"], capture_output=True, check=True, timeout=5 + ) + except ( + subprocess.CalledProcessError, + FileNotFoundError, + subprocess.TimeoutExpired, + ) as e: + raise ValueError( + "GitHub CLI (gh) is not installed or not available in PATH.\n" + "Please install it from: https://cli.github.com/\n" + "Or use package manager: brew install gh / apt install gh / etc." + ) from e + def extract_github_username_from_email(self, email: str) -> Optional[str]: """ Extract GitHub username from email address. - Common patterns: - - username@users.noreply.github.com - - 12345+username@users.noreply.github.com - - username@github.com - - For other emails, try to use the local part as a potential username + For GitHub noreply emails, extract directly from email pattern. + For all other emails, use GitHub CLI to lookup the username. """ email = email.strip().lower() @@ -114,7 +112,7 @@ def extract_github_username_from_email(self, email: str) -> Optional[str]: username = None - # GitHub noreply email patterns + # GitHub noreply email patterns - can extract directly if "users.noreply.github.com" in email: # Pattern: username@users.noreply.github.com match = re.match(r"^([^@+]+)@users\.noreply\.github\.com$", email) @@ -125,40 +123,21 @@ def extract_github_username_from_email(self, email: str) -> Optional[str]: match = re.match(r"^\d+\+([^@]+)@users\.noreply\.github\.com$", email) if match: username = match.group(1) - - # GitHub.com email - elif "@github.com" in email: - match = re.match(r"^([^@]+)@github\.com$", email) - if match: - username = match.group(1) - - # For other emails, try multiple lookup methods else: - # First, try GitHub API lookup if enabled - if self.use_api: - username = self.lookup_github_username_via_api(email) - else: - username = None - - # If API lookup fails or is disabled, try to get from commit history - if not username: - username = self.lookup_github_username_from_commits(email) - - # If still not found, use local part of email as a last resort fallback - # but don't return it as a username (return None instead) - if not username: - # We don't want to guess usernames from email local parts - # as they're often incorrect - username = None + # For all other emails, use GitHub CLI to lookup + username = self.lookup_github_username_via_gh_cli(email) # Cache the result (including None to avoid repeated failed lookups) self.email_to_github[email] = username return username - def lookup_github_username_via_api(self, email: str) -> Optional[str]: + def lookup_github_username_via_gh_cli(self, email: str) -> Optional[str]: """ - Look up GitHub username via GitHub API search. + Look up GitHub username using the GitHub CLI tool (gh). + + This queries the GitHub repository commits to find commits by the given email + and extracts the author's GitHub login name. Args: email: Email address to search for @@ -166,170 +145,51 @@ def lookup_github_username_via_api(self, email: str) -> Optional[str]: Returns: GitHub username if found, None otherwise """ - # Rate limiting based on GitHub API limits: - # - Authenticated: 5000 requests/hour = 1.39 req/sec - # - Unauthenticated: 60 requests/hour = 1 req/60sec - current_time = time.time() - - if self.api_call_count > 0: - time_since_last = current_time - self.last_api_call_time + try: + # Extract repository owner and name from git remote + remote_url = self.run_git_command( + ["git", "config", "--get", "remote.origin.url"] + ) - if self.github_token: - # With token: 5000/hour = 0.72 seconds between calls (with buffer) - min_delay = 0.8 - else: - # Without token: 60/hour = 60 seconds between calls - min_delay = 60.1 - - if time_since_last < min_delay: - time.sleep(min_delay - time_since_last) - - retry_count = 0 - max_retries = 3 - base_delay = 1 - - while retry_count < max_retries: - try: - # Search for users by email - search_url = f"https://api.github.com/search/users?q={email}+in:email" - - req = urllib.request.Request(search_url) - req.add_header("Accept", "application/vnd.github.v3+json") - req.add_header("User-Agent", "flashinfer-codeowner-analyzer") - - if self.github_token: - req.add_header("Authorization", f"Bearer {self.github_token}") - - self.api_call_count += 1 - self.last_api_call_time = time.time() - - with urllib.request.urlopen(req, timeout=10) as response: - data = json.loads(response.read().decode()) - - if data.get("total_count", 0) > 0 and "items" in data: - # Return the first matching user's login - login = data["items"][0].get("login") - result = login if isinstance(login, str) else None - # Cache the API result - self.email_to_github[email] = result - return result - - # Cache the failed lookup to avoid retrying - self.email_to_github[email] = None - return None - - except urllib.error.HTTPError as e: - if e.code == 403: - # Rate limit exceeded - implement exponential backoff - retry_delay = base_delay * (2**retry_count) - if retry_count < max_retries - 1: - print( - f"GitHub API rate limit hit, retrying in {retry_delay}s... (attempt {retry_count + 1}/{max_retries})" - ) - time.sleep(retry_delay) - retry_count += 1 - continue - else: - print( - f"Warning: GitHub API rate limit exceeded for email lookup: {email}" - ) - # Cache the failed lookup to avoid retrying - self.email_to_github[email] = None - return None - elif e.code == 401: - print( - "Warning: GitHub API authentication failed. Check your token." - ) - # Cache the failed lookup to avoid retrying - self.email_to_github[email] = None - return None - else: - # Other HTTP errors - don't retry but report them - print( - f"Warning: GitHub API HTTP error for {email}: {e.code} {e.reason}" - ) - # Cache the failed lookup to avoid retrying - self.email_to_github[email] = None - return None - except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as e: - # Network or parsing errors - retry with backoff - retry_delay = base_delay * (2**retry_count) - retry_count += 1 - if retry_count < max_retries: - print( - f"GitHub API network error for {email}, retrying in {retry_delay}s... (attempt {retry_count}/{max_retries}): {type(e).__name__}" - ) - time.sleep(retry_delay) - continue - # Cache the failed lookup after all retries exhausted - print( - f"Warning: GitHub API lookup failed for {email} after {max_retries} retries: {type(e).__name__}: {e}" - ) - self.email_to_github[email] = None + if not remote_url: return None - except Exception as e: - # Any other errors - don't retry but report them - print( - f"Warning: Unexpected error during GitHub API lookup for {email}: {type(e).__name__}: {e}" - ) - # Cache the failed lookup to avoid retrying - self.email_to_github[email] = None + + # Parse GitHub repo from URL (supports both HTTPS and SSH formats) + # HTTPS: https://github.com/owner/repo.git + # SSH: git@github.com:owner/repo.git + repo_match = re.search( + r"github\.com[:/]([^/]+)/([^/\s]+?)(?:\.git)?$", remote_url + ) + if not repo_match: return None - # Cache the failed lookup after all retries exhausted - self.email_to_github[email] = None - return None + repo_owner = repo_match.group(1) + repo_name = repo_match.group(2) + repo_full = f"{repo_owner}/{repo_name}" - def lookup_github_username_from_commits(self, email: str) -> Optional[str]: - """ - Try to find GitHub username from commit metadata. + # Use gh CLI to search for commits by this author email + # Use author filter in URL query string + gh_command = [ + "gh", + "api", + f"repos/{repo_full}/commits?author={email}&per_page=1", + "--jq", + ".[0].author.login // empty", + ] - This looks for commits by this email that might have been made via GitHub - which often includes the username in the commit message or author field. - """ - # Look for recent commits by this author - command = [ - "git", - "log", - "--author", - email, - "--format=%an|%cn|%s", # author name, committer name, subject - "--max-count=10", - ] - output = self.run_git_command(command) + result = subprocess.run( + gh_command, capture_output=True, text=True, timeout=10 + ) - if not output: - return None + if result.returncode == 0 and result.stdout.strip(): + username = result.stdout.strip() + return username if username else None - # Check if any commits were made via GitHub (often have specific patterns) - for line in output.split("\n"): - if line.strip(): - parts = line.split("|") - if len(parts) >= 3: - # Check commit message for GitHub PR patterns - subject = parts[2] - # Pattern: "Merge pull request #123 from username/branch" - match = re.search(r"from ([^/\s]+)/", subject) - if match: - username = match.group(1) - # Cache the result - self.email_to_github[email] = username - return username - - # Pattern: "Co-authored-by: Name " - match = re.search(r"Co-authored-by:.*<([^>]+)>", subject) - if match and "users.noreply.github.com" in match.group(1): - username = self.extract_github_username_from_email( - match.group(1) - ) - if username: - # Cache the result - self.email_to_github[email] = username - return username + return None - # Cache the failed lookup - self.email_to_github[email] = None - return None + except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception): + # If gh CLI fails or is not available, return None + return None def should_include_contributor(self, author_string: str) -> bool: """ @@ -734,8 +594,6 @@ def main() -> int: %(prog)s --exclude vendor/ deps/ # Exclude additional directories %(prog)s --days-back 180 # Analyze last 6 months %(prog)s --json-output owners.json # Export detailed JSON - %(prog)s --github-token TOKEN # Use GitHub API for email lookups - GITHUB_TOKEN=TOKEN %(prog)s # Or set via environment variable %(prog)s --allowed-users user1 user2 # Only include specific GitHub users %(prog)s --allowed-users-file team.txt # Load allowed users from file """, @@ -763,15 +621,6 @@ def main() -> int: default=[], help="Additional path patterns to exclude (e.g., vendor/ deps/)", ) - parser.add_argument( - "--github-token", - help="GitHub API token for email lookups (or set GITHUB_TOKEN env var)", - ) - parser.add_argument( - "--no-api", - action="store_true", - help="Disable GitHub API lookups for faster processing", - ) parser.add_argument( "--allowed-users", nargs="*", @@ -828,8 +677,6 @@ def main() -> int: min_commits=args.min_commits, days_back=args.days_back, exclude_patterns=args.exclude, - github_token=args.github_token, - use_api=not args.no_api, allowed_users=allowed_users, max_depth=args.depth, top_n_owners=args.top_n, From 363e3f066d7fec15fd3c7661e8b5c1ad9da85f64 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Tue, 14 Oct 2025 13:14:16 -0400 Subject: [PATCH 17/21] upd --- .github/workflows/update-codeowners.yml | 12 +++++++----- scripts/authorized_codeowner.txt | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 scripts/authorized_codeowner.txt diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index a48c2f61f2..42010a6399 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -5,6 +5,7 @@ on: # Run weekly on Monday at 00:00 UTC - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering + pull_request: permissions: contents: write @@ -36,7 +37,7 @@ jobs: --min-commits 1 \ --days-back 180 \ --top-n 5 \ - --no-api + --allowed-users-file scripts/authorized_codeowner.txt - name: Check for changes id: check_changes @@ -65,7 +66,7 @@ jobs: commit-message: | chore: update CODEOWNERS based on git history - Auto-generated CODEOWNERS update based on commit activity over the last 365 days. + Auto-generated CODEOWNERS update based on commit activity over the last 180 days. 🤖 Generated with [Claude Code](https://claude.com/claude-code) @@ -77,7 +78,7 @@ jobs: body: | ## Summary - This PR updates the CODEOWNERS file based on git commit history analysis from the last 365 days. + This PR updates the CODEOWNERS file based on git commit history analysis from the last 180 days. ## Changes @@ -95,9 +96,10 @@ jobs: ## Notes - This is an automated PR generated weekly - - Minimum commits threshold: 2 - - Analysis period: 365 days + - Minimum commits threshold: 1 + - Analysis period: 180 days - Directory depth: 3 levels + - Top N owners per module: 5 --- diff --git a/scripts/authorized_codeowner.txt b/scripts/authorized_codeowner.txt new file mode 100644 index 0000000000..a5d0ac361e --- /dev/null +++ b/scripts/authorized_codeowner.txt @@ -0,0 +1,21 @@ +aleozlx +Amir-19 +Anerudhan +azhurkevich +bkryu +cyx-6 +dierksen +IwakuraRein +joker-eph +kahyunnam +kaixih +nv-yunzheq +nvmbreughe +paul841029 +Quackens +sergachev +sunggg +ttyio +wenscarl +yongwww +yzh119 From 0961939541f1d0a17c02e21bee437579c3b50694 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Tue, 14 Oct 2025 13:16:45 -0400 Subject: [PATCH 18/21] remove debugging --- .github/workflows/update-codeowners.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 42010a6399..c14c5e6bb1 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -5,7 +5,6 @@ on: # Run weekly on Monday at 00:00 UTC - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering - pull_request: permissions: contents: write From 823ce149c5d34b553c960621af70fc8a18e0b8db Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sat, 18 Oct 2025 02:13:14 -0400 Subject: [PATCH 19/21] use flashinfer-bot --- .github/workflows/update-codeowners.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index c14c5e6bb1..7c98f43e21 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -5,6 +5,7 @@ on: # Run weekly on Monday at 00:00 UTC - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering + pull_request: permissions: contents: write @@ -61,7 +62,9 @@ jobs: if: steps.check_changes.outputs.changed == 'true' uses: peter-evans/create-pull-request@v7 with: - token: ${{ secrets.GITHUB_TOKEN }} + token: ${{ secrets.FLASHINFER_BOT_TOKEN }} + committer: flashinfer-bot + author: flashinfer-bot commit-message: | chore: update CODEOWNERS based on git history From 55bc90ec210e6387981fcd33611abcae96893f3b Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sat, 18 Oct 2025 02:50:21 -0400 Subject: [PATCH 20/21] upd --- .github/workflows/update-codeowners.yml | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 7c98f43e21..7c9b40a9a0 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -15,6 +15,11 @@ jobs: update-codeowners: runs-on: ubuntu-latest timeout-minutes: 30 + env: + DEPTH: 3 + MIN_COMMITS: 1 + DAYS_BACK: 180 + TOP_N: 5 steps: - name: Checkout repository uses: actions/checkout@v4.2.2 @@ -33,10 +38,10 @@ jobs: run: | python scripts/codeowner_analyzer.py \ --output .github/CODEOWNERS \ - --depth 3 \ - --min-commits 1 \ - --days-back 180 \ - --top-n 5 \ + --depth ${{ env.DEPTH }} \ + --min-commits ${{ env.MIN_COMMITS }} \ + --days-back ${{ env.DAYS_BACK }} \ + --top-n ${{ env.TOP_N }} \ --allowed-users-file scripts/authorized_codeowner.txt - name: Check for changes @@ -68,7 +73,7 @@ jobs: commit-message: | chore: update CODEOWNERS based on git history - Auto-generated CODEOWNERS update based on commit activity over the last 180 days. + Auto-generated CODEOWNERS update based on commit activity over the last ${{ env.DAYS_BACK }} days. 🤖 Generated with [Claude Code](https://claude.com/claude-code) @@ -80,7 +85,7 @@ jobs: body: | ## Summary - This PR updates the CODEOWNERS file based on git commit history analysis from the last 180 days. + This PR updates the CODEOWNERS file based on git commit history analysis from the last ${{ env.DAYS_BACK }} days. ## Changes @@ -98,10 +103,10 @@ jobs: ## Notes - This is an automated PR generated weekly - - Minimum commits threshold: 1 - - Analysis period: 180 days - - Directory depth: 3 levels - - Top N owners per module: 5 + - Minimum commits threshold: ${{ env.MIN_COMMITS }} + - Analysis period: ${{ env.DAYS_BACK }} days + - Directory depth: ${{ env.DEPTH }} levels + - Top N owners per module: ${{ env.TOP_N }} --- From e2c4f7a112e476becc9c835572eec5a0e049eb13 Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Sat, 18 Oct 2025 02:52:11 -0400 Subject: [PATCH 21/21] remove the debug 'pull_request' --- .github/workflows/update-codeowners.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/update-codeowners.yml b/.github/workflows/update-codeowners.yml index 7c9b40a9a0..a1beed6339 100644 --- a/.github/workflows/update-codeowners.yml +++ b/.github/workflows/update-codeowners.yml @@ -5,7 +5,6 @@ on: # Run weekly on Monday at 00:00 UTC - cron: '0 0 * * 1' workflow_dispatch: # Allow manual triggering - pull_request: permissions: contents: write