flashinfer-ai · aleozlx · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026 · coderabbitai
@@ -0,0 +1,64 @@
+name: PR Auto-Label
+
+on:
+  pull_request_target:
+    types: [opened, synchronize]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  label:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Skip PRs older than 21 days
+        env:
+          PR_CREATED: ${{ github.event.pull_request.created_at }}
+        run: |
+          created=$(date -d "$PR_CREATED" +%s)
+          cutoff=$(date -d '21 days ago' +%s)
+          if [ "$created" -lt "$cutoff" ]; then
+            echo "PR older than 21 days, skipping"
+            exit 1
+          fi
+
+      - name: Sparse checkout overrides file from base branch
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: scripts/codeowner_overrides.json
+          sparse-checkout-cone-mode: false
+
+      - name: Apply labels based on changed files
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+        run: |
+          # Fetch changed files in the PR
+          changed_files=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}/files" \
+            --paginate --jq '.[].filename')
+
+          # Read override groups and apply matching labels
+          jq -c '.[]' scripts/codeowner_overrides.json | while read -r group; do
+            readarray -t labels < <(echo "$group" | jq -r '.labels[]')
+            readarray -t prefixes < <(echo "$group" | jq -r '.owners | keys[]')
+
+            match=false
+            for prefix in "${prefixes[@]}"; do
+              for file in $changed_files; do
+                case "$file" in
+                  "$prefix"*) match=true; break ;;
+                esac
+              done
+              if $match; then break; fi
+            done
+
+            if $match; then
+              for label in "${labels[@]}"; do
+                echo "Adding label: $label"
+                gh pr edit "$PR_NUMBER" \
+                  --repo "${{ github.repository }}" \
+                  --add-label "$label" || true
+              done
+            fi
+          done
@@ -18,6 +18,31 @@
 import re
 
 
+def load_overrides(filename: str) -> Dict[str, List[str]]:
+    """Load codeowner overrides from a grouped JSON array file.
+
+    Returns a flat dict mapping path prefixes to lists of owners.
+    """
+    with open(filename, "r") as f:
+        raw = json.load(f)
+
+    if not isinstance(raw, list):
+        raise ValueError("Overrides file must contain a JSON array")
+
+    overrides: Dict[str, List[str]] = {}
+    for group in raw:
+        if not isinstance(group, dict) or not isinstance(group.get("owners"), dict):
+            raise ValueError("Each group must be an object with an 'owners' dict")
+        for path, users in group["owners"].items():
+            if not isinstance(users, list) or not all(
+                isinstance(u, str) for u in users
+            ):
+                raise ValueError(f"Override for '{path}' must be a list of strings")
+            overrides[path.lstrip("./").rstrip("/")] = users
+
+    return overrides
+
+
 class CodeOwnersAnalyzer:
     def __init__(
         self,
@@ -524,62 +549,67 @@ def _normalize_usernames(self, users: List[str]) -> List[str]:
         """Normalize usernames to have @ prefix."""
         return [f"@{u}" if not u.startswith("@") else u for u in users]
 
+    def _extract_computed_usernames(self, data: Dict[str, Any]) -> List[str]:
+        """Extract deduplicated GitHub usernames from computed ownership data."""
+        usernames: List[str] = []
+        seen_lower: set = set()
+        if not data["owners"]:
+            return usernames
+        top_owners = [
+            o for o in data["owners"][: self.top_n_owners] if o["ownership_score"] > 0.1
+        ]
+        for owner in top_owners:
+            gh = self.get_github_username(owner["author"])
+            name = f"@{gh}" if gh else owner["author"].split("<")[1].rstrip(">")
+            if name.lower() not in seen_lower:
+                usernames.append(name)
+                seen_lower.add(name.lower())
+        return usernames
+
     def _merge_owners_with_overrides(
         self, module: str, computed_usernames: List[str]
     ) -> List[str]:
-        """Merge manual overrides with computed owners.
-
-        Override users are prepended to the list (indicating primary ownership),
-        and duplicates are removed. Override users are always included, even if
-        that exceeds top_n_owners. Only computed owners are subject to truncation.
+        """Merge manual overrides with computed owners for an exact-match path.
 
-        Note: File-level overrides are handled separately in generate_codeowners_file(),
-        since there's no computed ownership for individual files.
-
-        Args:
-            module: The module path (e.g., "flashinfer/fused_moe")
-            computed_usernames: List of GitHub usernames from git history analysis
-                (with @ prefix, e.g., ["@alice", "@bob"])
-
-        Returns:
-            Merged list of usernames with overrides first (all overrides preserved)
+        Override users are prepended; computed users fill remaining slots up to
+        top_n_owners.  The result is wider than either list alone.
         """
-        if not self.owner_overrides:
-            return computed_usernames[: self.top_n_owners]
-
-        # Skip file-level overrides (handled separately)
-        if self._is_file_path(module):
-            return computed_usernames[: self.top_n_owners]
-
-        # Get override users for this module (without @ prefix in the config)
         override_users = self.owner_overrides.get(module, [])
         if not override_users:
             return computed_usernames[: self.top_n_owners]
 
-        # Normalize override users to have @ prefix
-        override_usernames = self._normalize_usernames(override_users)
-
-        # Build merged list: overrides first, then computed (excluding duplicates)
-        # Override users are always preserved; only computed users are truncated
-        # Use case-insensitive comparison since GitHub usernames are case-insensitive
-        merged = list(override_usernames)
+        merged = self._normalize_usernames(override_users)
         merged_lower = {u.lower() for u in merged}
-        num_overrides = len(merged)
-        remaining_slots = max(0, self.top_n_owners - num_overrides)
+        remaining = max(0, self.top_n_owners - len(merged))
 
         for username in computed_usernames:
+            if remaining <= 0:
+                break
             if username.lower() not in merged_lower:
-                if remaining_slots > 0:
-                    merged.append(username)
-                    merged_lower.add(username.lower())
-                    remaining_slots -= 1
+                merged.append(username)
+                merged_lower.add(username.lower())
+                remaining -= 1
 
         return merged
 
+    def _format_codeowners_path(self, path: str) -> str:
+        """Format a path for CODEOWNERS (append / for directories)."""
+        return path if self._is_file_path(path) else f"{path}/"
+
-    def _format_codeowners_path(self, path: str) -> str:
-        """Format a path for CODEOWNERS (append / for directories)."""
-        return path if self._is_file_path(path) else f"{path}/"
+    def _format_codeowners_path(self, path: str) -> str:
+        """Format a path for CODEOWNERS (append / only for real directories)."""
+        normalized = path.rstrip("/")
+        abs_path = self.repo_path / normalized
+        if abs_path.is_dir():
+            return f"{normalized}/"
+        return normalized
-    def _format_codeowners_path(self, path: str) -> str:
-        """Format a path for CODEOWNERS (append / for directories)."""
-        return path if self._is_file_path(path) else f"{path}/"
+    def _format_codeowners_path(self, path: str) -> str:
+        """Format a path for CODEOWNERS (append / only for real directories)."""
+        normalized = path.rstrip("/")
+        abs_path = self.repo_path / normalized
+        if abs_path.is_dir():
+            return f"{normalized}/"
+        return normalized
     def generate_codeowners_file(
         self, results: Dict[str, Any], output_file: str = "CODEOWNERS"
     ) -> None:
-        """Generate a CODEOWNERS file based on analysis."""
+        """Generate a CODEOWNERS file from computed results and overrides.
+
+        All paths (computed and override-only) are emitted in alphabetical
+        order.  For exact-match paths, override owners are prepended to the
+        computed list so the final owner set is wider.  Alphabetical ordering
+        naturally puts parent dirs before children, which matters because
+        CODEOWNERS uses last-match-wins.
+        """
+        override_paths = set(self.owner_overrides) if self.owner_overrides else set()
+        all_paths = sorted(set(results) | override_paths)
+
         with open(output_file, "w") as f:
             f.write("# Code Owners File\n")
             f.write("# Generated automatically from git history analysis\n")
@@ -589,58 +619,23 @@ def generate_codeowners_file(
                 f.write("# Manual overrides applied from overrides file\n")
             f.write("\n")
 
-            # Write directory entries (computed + merged overrides)
-            for module, data in results.items():
-                # Extract GitHub usernames from computed owners
-                # Use case-insensitive deduplication since the same user may appear
-                # multiple times with different email addresses
-                computed_usernames = []
-                seen_usernames_lower = set()
-                if data["owners"]:
-                    # Take top N owners or those with ownership score > 0.1
-                    top_owners = [
-                        owner
-                        for owner in data["owners"][: self.top_n_owners]
-                        if owner["ownership_score"] > 0.1
-                    ]
-
-                    for owner in top_owners:
-                        github_username = self.get_github_username(owner["author"])
-                        if github_username:
-                            username = f"@{github_username}"
-                        else:
-                            # Fallback to email if no GitHub username found
-                            username = owner["author"].split("<")[1].rstrip(">")
-
-                        # Skip duplicates (case-insensitive)
-                        if username.lower() not in seen_usernames_lower:
-                            computed_usernames.append(username)
-                            seen_usernames_lower.add(username.lower())
-
-                # Merge with overrides
-                final_usernames = self._merge_owners_with_overrides(
-                    module, computed_usernames
-                )
-
-                if final_usernames:
-                    owners_list = " ".join(final_usernames)
-                    f.write(f"{module}/ {owners_list}\n")
-
-            # Write file-level overrides LAST (CODEOWNERS uses last-match-wins)
-            # This ensures file-specific overrides take precedence over directory patterns
-            if self.owner_overrides:
-                file_overrides = [
-                    (path, users)
-                    for path, users in self.owner_overrides.items()
-                    if self._is_file_path(path)
-                ]
-                if file_overrides:
-                    f.write(
-                        "\n# File-level overrides (must come last for precedence)\n"
+            for path in all_paths:
+                if path in results:
+                    computed = self._extract_computed_usernames(results[path])
+                    final = self._merge_owners_with_overrides(path, computed)
+                else:
+                    # Override-only path (no computed match)
+                    final = self._normalize_usernames(self.owner_overrides[path])
+
+                if final:
+                    # Sanitize against newline injection
+                    safe_path = (
+                        self._format_codeowners_path(path)
+                        .replace("\n", "")
+                        .replace("\r", "")
                     )
-                    for path, users in sorted(file_overrides):
-                        usernames = self._normalize_usernames(users)
-                        f.write(f"{path} {' '.join(usernames)}\n")
+                    safe_owners = [u.replace("\n", "").replace("\r", "") for u in final]
+                    f.write(f"{safe_path} {' '.join(safe_owners)}\n")
 
     def print_detailed_report(self, results: Dict[str, Any]) -> None:
         """Print a detailed ownership report."""
@@ -777,46 +772,15 @@ def main() -> int:
     owner_overrides = None
     if args.overrides_file:
         try:
-            with open(args.overrides_file, "r") as f:
-                owner_overrides = json.load(f)
-            if not isinstance(owner_overrides, dict):
-                print(
-                    "Error: Overrides file must contain a JSON object (dict)",
-                    file=sys.stderr,
-                )
-                return 1
-            # Validate structure: all values should be lists of strings
-            for path, users in owner_overrides.items():
-                if not isinstance(users, list):
-                    print(
-                        f"Error: Override for '{path}' must be a list of usernames",
-                        file=sys.stderr,
-                    )
-                    return 1
-                for user in users:
-                    if not isinstance(user, str):
-                        print(
-                            f"Error: Override users for '{path}' must be strings",
-                            file=sys.stderr,
-                        )
-                        return 1
-
-            # Normalize paths: strip leading "./" and trailing "/"
-            owner_overrides = {
-                path.lstrip("./").rstrip("/"): users
-                for path, users in owner_overrides.items()
-            }
+            owner_overrides = load_overrides(args.overrides_file)
         except FileNotFoundError:
             print(
                 f"Error: Overrides file not found: {args.overrides_file}",
                 file=sys.stderr,
             )
             return 1
-        except json.JSONDecodeError as e:
-            print(f"Error: Invalid JSON in overrides file: {e}", file=sys.stderr)
-            return 1
-        except Exception as e:
-            print(f"Error reading overrides file: {e}", file=sys.stderr)
+        except (json.JSONDecodeError, ValueError) as e:
+            print(f"Error in overrides file: {e}", file=sys.stderr)
             return 1
 
     try:

@@ -1,24 +1,44 @@
-{
-  "scripts/codeowner_overrides.json": ["@ci-users"],
-
-  "flashinfer/attention.py": ["@flashinfer-ai/attention-owners"],
-  "include/flashinfer/attention": ["@flashinfer-ai/attention-owners"],
-  "csrc/fmha_v2": ["@flashinfer-ai/attention-owners"],
-  "csrc/xqa": ["@flashinfer-ai/attention-owners"],
-  "tests/attention": ["@flashinfer-ai/attention-owners"],
-
-  "flashinfer/gemm": ["@flashinfer-ai/gemm-owners"],
-  "include/flashinfer/gemm": ["@flashinfer-ai/gemm-owners"],
-  "tests/gemm": ["@flashinfer-ai/gemm-owners"],
-
-  "flashinfer/fused_moe": ["@flashinfer-ai/moe-owners"],
-  "csrc/fused_moe": ["@flashinfer-ai/moe-owners"],
-  "tests/moe": ["@flashinfer-ai/moe-owners"],
-
-  "flashinfer/comm": ["@flashinfer-ai/comm-owners"],
-  "include/flashinfer/comm": ["@flashinfer-ai/comm-owners"],
-  "tests/comm": ["@flashinfer-ai/comm-owners"],
-
-  "flashinfer/norm.py": ["@flashinfer-ai/misc-op-owners"],
-  "tests/norm": ["@flashinfer-ai/misc-op-owners"]
-}
+[
+  {
+    "labels": ["op: attention"],
+    "owners": {
+      "flashinfer/attention.py": ["@flashinfer-ai/attention-owners"],
+      "include/flashinfer/attention": ["@flashinfer-ai/attention-owners"],
+      "csrc/fmha_v2": ["@flashinfer-ai/attention-owners"],
+      "csrc/xqa": ["@flashinfer-ai/attention-owners"],
+      "tests/attention": ["@flashinfer-ai/attention-owners"]
+    }
+  },
+  {
+    "labels": ["op: gemm"],
+    "owners": {
+      "flashinfer/gemm": ["@flashinfer-ai/gemm-owners"],
+      "include/flashinfer/gemm": ["@flashinfer-ai/gemm-owners"],
+      "tests/gemm": ["@flashinfer-ai/gemm-owners"]
+    }
+  },
+  {
+    "labels": ["op: moe"],
+    "owners": {
+      "flashinfer/fused_moe": ["@flashinfer-ai/moe-owners"],
+      "csrc/fused_moe": ["@flashinfer-ai/moe-owners"],
+      "tests/moe": ["@flashinfer-ai/moe-owners"]
+    }
+  },
+  {
+    "labels": ["op: comm"],
+    "owners": {
+      "flashinfer/comm": ["@flashinfer-ai/comm-owners"],
+      "include/flashinfer/comm": ["@flashinfer-ai/comm-owners"],
+      "tests/comm": ["@flashinfer-ai/comm-owners"]
+    }
+  },
+  {
+    "labels": ["op: misc"],
+    "owners": {
+      "scripts/codeowner_overrides.json": ["@ci-users"],
+      "flashinfer/norm.py": ["@flashinfer-ai/misc-op-owners"],
+      "tests/norm": ["@flashinfer-ai/misc-op-owners"]
+    }
+  }
+]