From b3686a052799f379b9e033c5f397c2b0f46529b1 Mon Sep 17 00:00:00 2001
From: Alex Yang <aleyang@nvidia.com>
Date: Tue, 3 Mar 2026 18:43:53 -0800
Subject: [PATCH 1/3] feat: restructure codeowner_overrides.json for PR
 auto-labeling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Restructure codeowner_overrides.json from a flat dict to a grouped array
where each group has `labels` (for PR auto-labeling) and `owners` (path
to owner mappings). All existing path→owner mappings are preserved.

Update codeowner_analyzer.py to parse the new format via a standalone
`load_overrides()` function, and simplify CODEOWNERS generation to a
single alphabetically-sorted pass over the union of computed and override
paths. Override owners are prepended on exact matches for a wider owner
list; override-only paths (including directories) are now emitted too.

Add pr-auto-label.yml workflow that reads the grouped overrides, matches
changed files by path prefix, and additively applies labels. Uses
pull_request_target for safe write access on external PRs. Skips PRs
older than 21 days.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/pr-auto-label.yml |  64 +++++++++
 scripts/codeowner_analyzer.py       | 201 +++++++++++-----------------
 scripts/codeowner_overrides.json    |  68 ++++++----
 3 files changed, 187 insertions(+), 146 deletions(-)
 create mode 100644 .github/workflows/pr-auto-label.yml

diff --git a/.github/workflows/pr-auto-label.yml b/.github/workflows/pr-auto-label.yml
new file mode 100644
index 0000000000..8b1587a7bd
--- /dev/null
+++ b/.github/workflows/pr-auto-label.yml
@@ -0,0 +1,64 @@
+name: PR Auto-Label
+
+on:
+  pull_request_target:
+    types: [opened, synchronize]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  label:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Skip PRs older than 21 days
+        env:
+          PR_CREATED: ${{ github.event.pull_request.created_at }}
+        run: |
+          created=$(date -d "$PR_CREATED" +%s)
+          cutoff=$(date -d '21 days ago' +%s)
+          if [ "$created" -lt "$cutoff" ]; then
+            echo "PR older than 21 days, skipping"
+            exit 1
+          fi
+
+      - name: Sparse checkout overrides file from base branch
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: scripts/codeowner_overrides.json
+          sparse-checkout-cone-mode: false
+
+      - name: Apply labels based on changed files
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+        run: |
+          # Fetch changed files in the PR
+          changed_files=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}/files" \
+            --paginate --jq '.[].filename')
+
+          # Read override groups and apply matching labels
+          jq -c '.[]' scripts/codeowner_overrides.json | while read -r group; do
+            labels=$(echo "$group" | jq -r '.labels[]')
+            prefixes=$(echo "$group" | jq -r '.owners | keys[]')
+
+            match=false
+            for prefix in $prefixes; do
+              for file in $changed_files; do
+                case "$file" in
+                  "$prefix"*) match=true; break ;;
+                esac
+              done
+              if $match; then break; fi
+            done
+
+            if $match; then
+              for label in $labels; do
+                echo "Adding label: $label"
+                gh pr edit "$PR_NUMBER" \
+                  --repo "${{ github.repository }}" \
+                  --add-label "$label" || true
+              done
+            fi
+          done
diff --git a/scripts/codeowner_analyzer.py b/scripts/codeowner_analyzer.py
index acae27f8b2..6a31562120 100644
--- a/scripts/codeowner_analyzer.py
+++ b/scripts/codeowner_analyzer.py
@@ -18,6 +18,31 @@
 import re
 
 
+def load_overrides(filename: str) -> Dict[str, List[str]]:
+    """Load codeowner overrides from a grouped JSON array file.
+
+    Returns a flat dict mapping path prefixes to lists of owners.
+    """
+    with open(filename, "r") as f:
+        raw = json.load(f)
+
+    if not isinstance(raw, list):
+        raise ValueError("Overrides file must contain a JSON array")
+
+    overrides: Dict[str, List[str]] = {}
+    for group in raw:
+        if not isinstance(group, dict) or "owners" not in group:
+            raise ValueError("Each group must be an object with an 'owners' key")
+        for path, users in group["owners"].items():
+            if not isinstance(users, list) or not all(
+                isinstance(u, str) for u in users
+            ):
+                raise ValueError(f"Override for '{path}' must be a list of strings")
+            overrides[path.lstrip("./").rstrip("/")] = users
+
+    return overrides
+
+
 class CodeOwnersAnalyzer:
     def __init__(
         self,
@@ -524,62 +549,67 @@ def _normalize_usernames(self, users: List[str]) -> List[str]:
         """Normalize usernames to have @ prefix."""
         return [f"@{u}" if not u.startswith("@") else u for u in users]
 
+    def _extract_computed_usernames(self, data: Dict[str, Any]) -> List[str]:
+        """Extract deduplicated GitHub usernames from computed ownership data."""
+        usernames: List[str] = []
+        seen_lower: set = set()
+        if not data["owners"]:
+            return usernames
+        top_owners = [
+            o for o in data["owners"][: self.top_n_owners] if o["ownership_score"] > 0.1
+        ]
+        for owner in top_owners:
+            gh = self.get_github_username(owner["author"])
+            name = f"@{gh}" if gh else owner["author"].split("<")[1].rstrip(">")
+            if name.lower() not in seen_lower:
+                usernames.append(name)
+                seen_lower.add(name.lower())
+        return usernames
+
     def _merge_owners_with_overrides(
         self, module: str, computed_usernames: List[str]
     ) -> List[str]:
-        """Merge manual overrides with computed owners.
-
-        Override users are prepended to the list (indicating primary ownership),
-        and duplicates are removed. Override users are always included, even if
-        that exceeds top_n_owners. Only computed owners are subject to truncation.
+        """Merge manual overrides with computed owners for an exact-match path.
 
-        Note: File-level overrides are handled separately in generate_codeowners_file(),
-        since there's no computed ownership for individual files.
-
-        Args:
-            module: The module path (e.g., "flashinfer/fused_moe")
-            computed_usernames: List of GitHub usernames from git history analysis
-                (with @ prefix, e.g., ["@alice", "@bob"])
-
-        Returns:
-            Merged list of usernames with overrides first (all overrides preserved)
+        Override users are prepended; computed users fill remaining slots up to
+        top_n_owners.  The result is wider than either list alone.
         """
-        if not self.owner_overrides:
-            return computed_usernames[: self.top_n_owners]
-
-        # Skip file-level overrides (handled separately)
-        if self._is_file_path(module):
-            return computed_usernames[: self.top_n_owners]
-
-        # Get override users for this module (without @ prefix in the config)
         override_users = self.owner_overrides.get(module, [])
         if not override_users:
             return computed_usernames[: self.top_n_owners]
 
-        # Normalize override users to have @ prefix
-        override_usernames = self._normalize_usernames(override_users)
-
-        # Build merged list: overrides first, then computed (excluding duplicates)
-        # Override users are always preserved; only computed users are truncated
-        # Use case-insensitive comparison since GitHub usernames are case-insensitive
-        merged = list(override_usernames)
+        merged = self._normalize_usernames(override_users)
         merged_lower = {u.lower() for u in merged}
-        num_overrides = len(merged)
-        remaining_slots = max(0, self.top_n_owners - num_overrides)
+        remaining = max(0, self.top_n_owners - len(merged))
 
         for username in computed_usernames:
+            if remaining <= 0:
+                break
             if username.lower() not in merged_lower:
-                if remaining_slots > 0:
-                    merged.append(username)
-                    merged_lower.add(username.lower())
-                    remaining_slots -= 1
+                merged.append(username)
+                merged_lower.add(username.lower())
+                remaining -= 1
 
         return merged
 
+    def _format_codeowners_path(self, path: str) -> str:
+        """Format a path for CODEOWNERS (append / for directories)."""
+        return path if self._is_file_path(path) else f"{path}/"
+
     def generate_codeowners_file(
         self, results: Dict[str, Any], output_file: str = "CODEOWNERS"
     ) -> None:
-        """Generate a CODEOWNERS file based on analysis."""
+        """Generate a CODEOWNERS file from computed results and overrides.
+
+        All paths (computed and override-only) are emitted in alphabetical
+        order.  For exact-match paths, override owners are prepended to the
+        computed list so the final owner set is wider.  Alphabetical ordering
+        naturally puts parent dirs before children, which matters because
+        CODEOWNERS uses last-match-wins.
+        """
+        override_paths = set(self.owner_overrides) if self.owner_overrides else set()
+        all_paths = sorted(set(results) | override_paths)
+
         with open(output_file, "w") as f:
             f.write("# Code Owners File\n")
             f.write("# Generated automatically from git history analysis\n")
@@ -589,58 +619,16 @@ def generate_codeowners_file(
                 f.write("# Manual overrides applied from overrides file\n")
             f.write("\n")
 
-            # Write directory entries (computed + merged overrides)
-            for module, data in results.items():
-                # Extract GitHub usernames from computed owners
-                # Use case-insensitive deduplication since the same user may appear
-                # multiple times with different email addresses
-                computed_usernames = []
-                seen_usernames_lower = set()
-                if data["owners"]:
-                    # Take top N owners or those with ownership score > 0.1
-                    top_owners = [
-                        owner
-                        for owner in data["owners"][: self.top_n_owners]
-                        if owner["ownership_score"] > 0.1
-                    ]
-
-                    for owner in top_owners:
-                        github_username = self.get_github_username(owner["author"])
-                        if github_username:
-                            username = f"@{github_username}"
-                        else:
-                            # Fallback to email if no GitHub username found
-                            username = owner["author"].split("<")[1].rstrip(">")
-
-                        # Skip duplicates (case-insensitive)
-                        if username.lower() not in seen_usernames_lower:
-                            computed_usernames.append(username)
-                            seen_usernames_lower.add(username.lower())
-
-                # Merge with overrides
-                final_usernames = self._merge_owners_with_overrides(
-                    module, computed_usernames
-                )
-
-                if final_usernames:
-                    owners_list = " ".join(final_usernames)
-                    f.write(f"{module}/ {owners_list}\n")
+            for path in all_paths:
+                if path in results:
+                    computed = self._extract_computed_usernames(results[path])
+                    final = self._merge_owners_with_overrides(path, computed)
+                else:
+                    # Override-only path (no computed match)
+                    final = self._normalize_usernames(self.owner_overrides[path])
 
-            # Write file-level overrides LAST (CODEOWNERS uses last-match-wins)
-            # This ensures file-specific overrides take precedence over directory patterns
-            if self.owner_overrides:
-                file_overrides = [
-                    (path, users)
-                    for path, users in self.owner_overrides.items()
-                    if self._is_file_path(path)
-                ]
-                if file_overrides:
-                    f.write(
-                        "\n# File-level overrides (must come last for precedence)\n"
-                    )
-                    for path, users in sorted(file_overrides):
-                        usernames = self._normalize_usernames(users)
-                        f.write(f"{path} {' '.join(usernames)}\n")
+                if final:
+                    f.write(f"{self._format_codeowners_path(path)} {' '.join(final)}\n")
 
     def print_detailed_report(self, results: Dict[str, Any]) -> None:
         """Print a detailed ownership report."""
@@ -777,46 +765,15 @@ def main() -> int:
     owner_overrides = None
     if args.overrides_file:
         try:
-            with open(args.overrides_file, "r") as f:
-                owner_overrides = json.load(f)
-            if not isinstance(owner_overrides, dict):
-                print(
-                    "Error: Overrides file must contain a JSON object (dict)",
-                    file=sys.stderr,
-                )
-                return 1
-            # Validate structure: all values should be lists of strings
-            for path, users in owner_overrides.items():
-                if not isinstance(users, list):
-                    print(
-                        f"Error: Override for '{path}' must be a list of usernames",
-                        file=sys.stderr,
-                    )
-                    return 1
-                for user in users:
-                    if not isinstance(user, str):
-                        print(
-                            f"Error: Override users for '{path}' must be strings",
-                            file=sys.stderr,
-                        )
-                        return 1
-
-            # Normalize paths: strip leading "./" and trailing "/"
-            owner_overrides = {
-                path.lstrip("./").rstrip("/"): users
-                for path, users in owner_overrides.items()
-            }
+            owner_overrides = load_overrides(args.overrides_file)
         except FileNotFoundError:
             print(
                 f"Error: Overrides file not found: {args.overrides_file}",
                 file=sys.stderr,
             )
             return 1
-        except json.JSONDecodeError as e:
-            print(f"Error: Invalid JSON in overrides file: {e}", file=sys.stderr)
-            return 1
-        except Exception as e:
-            print(f"Error reading overrides file: {e}", file=sys.stderr)
+        except (json.JSONDecodeError, ValueError) as e:
+            print(f"Error in overrides file: {e}", file=sys.stderr)
             return 1
 
     try:
diff --git a/scripts/codeowner_overrides.json b/scripts/codeowner_overrides.json
index 9816d5e4dd..9493c4b17d 100644
--- a/scripts/codeowner_overrides.json
+++ b/scripts/codeowner_overrides.json
@@ -1,24 +1,44 @@
-{
-  "scripts/codeowner_overrides.json": ["@ci-users"],
-
-  "flashinfer/attention.py": ["@flashinfer-ai/attention-owners"],
-  "include/flashinfer/attention": ["@flashinfer-ai/attention-owners"],
-  "csrc/fmha_v2": ["@flashinfer-ai/attention-owners"],
-  "csrc/xqa": ["@flashinfer-ai/attention-owners"],
-  "tests/attention": ["@flashinfer-ai/attention-owners"],
-
-  "flashinfer/gemm": ["@flashinfer-ai/gemm-owners"],
-  "include/flashinfer/gemm": ["@flashinfer-ai/gemm-owners"],
-  "tests/gemm": ["@flashinfer-ai/gemm-owners"],
-
-  "flashinfer/fused_moe": ["@flashinfer-ai/moe-owners"],
-  "csrc/fused_moe": ["@flashinfer-ai/moe-owners"],
-  "tests/moe": ["@flashinfer-ai/moe-owners"],
-
-  "flashinfer/comm": ["@flashinfer-ai/comm-owners"],
-  "include/flashinfer/comm": ["@flashinfer-ai/comm-owners"],
-  "tests/comm": ["@flashinfer-ai/comm-owners"],
-
-  "flashinfer/norm.py": ["@flashinfer-ai/misc-op-owners"],
-  "tests/norm": ["@flashinfer-ai/misc-op-owners"]
-}
+[
+  {
+    "labels": ["op: attention"],
+    "owners": {
+      "flashinfer/attention.py": ["@flashinfer-ai/attention-owners"],
+      "include/flashinfer/attention": ["@flashinfer-ai/attention-owners"],
+      "csrc/fmha_v2": ["@flashinfer-ai/attention-owners"],
+      "csrc/xqa": ["@flashinfer-ai/attention-owners"],
+      "tests/attention": ["@flashinfer-ai/attention-owners"]
+    }
+  },
+  {
+    "labels": ["op: gemm"],
+    "owners": {
+      "flashinfer/gemm": ["@flashinfer-ai/gemm-owners"],
+      "include/flashinfer/gemm": ["@flashinfer-ai/gemm-owners"],
+      "tests/gemm": ["@flashinfer-ai/gemm-owners"]
+    }
+  },
+  {
+    "labels": ["op: moe"],
+    "owners": {
+      "flashinfer/fused_moe": ["@flashinfer-ai/moe-owners"],
+      "csrc/fused_moe": ["@flashinfer-ai/moe-owners"],
+      "tests/moe": ["@flashinfer-ai/moe-owners"]
+    }
+  },
+  {
+    "labels": ["op: comm"],
+    "owners": {
+      "flashinfer/comm": ["@flashinfer-ai/comm-owners"],
+      "include/flashinfer/comm": ["@flashinfer-ai/comm-owners"],
+      "tests/comm": ["@flashinfer-ai/comm-owners"]
+    }
+  },
+  {
+    "labels": ["op: misc"],
+    "owners": {
+      "scripts/codeowner_overrides.json": ["@ci-users"],
+      "flashinfer/norm.py": ["@flashinfer-ai/misc-op-owners"],
+      "tests/norm": ["@flashinfer-ai/misc-op-owners"]
+    }
+  }
+]

From b22fa5fc649b3dc160cb40d80fcc479ea8c725e5 Mon Sep 17 00:00:00 2001
From: Alex Yang <aleyang@nvidia.com>
Date: Tue, 3 Mar 2026 19:10:06 -0800
Subject: [PATCH 2/3] feat: add --labels flag to filter tests by PR labels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add --labels option to test_utils.sh parse_args() accepting
comma-separated PR labels (e.g., "op: attention,op: gemm").

Implement gh_label_filter() which reads codeowner_overrides.json to
determine which test prefixes are categorized under which labels.
Four-step logic: (1) no labels → run all, (2) label match → run,
(3) categorized but no match → skip, (4) uncategorized → always run.

Wire the filter into task_run_unit_tests.sh find_test_files() so
discovered test files are filtered before execution.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 scripts/task_run_unit_tests.sh |  8 +++-
 scripts/test_utils.sh          | 85 +++++++++++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/scripts/task_run_unit_tests.sh b/scripts/task_run_unit_tests.sh
index 303bb00d3a..47cbe24860 100755
--- a/scripts/task_run_unit_tests.sh
+++ b/scripts/task_run_unit_tests.sh
@@ -50,7 +50,13 @@ find_test_files() {
         done
 
         if [ "$exclude_file" = false ]; then
-            TEST_FILES="$TEST_FILES $test_file"
+            # Apply label-based filtering if --labels was provided
+            local skip_reason
+            if skip_reason=$(gh_label_filter "$test_file"); then
+                TEST_FILES="$TEST_FILES $test_file"
+            else
+                echo "  Skipping $test_file: $skip_reason"
+            fi
         fi
     done
 
diff --git a/scripts/test_utils.sh b/scripts/test_utils.sh
index 68106e8c98..6308f31c35 100755
--- a/scripts/test_utils.sh
+++ b/scripts/test_utils.sh
@@ -36,8 +36,9 @@ EXIT_CODE=0
 parse_args() {
     DRY_RUN=false
     SANITY_TEST=false
-    for arg in "$@"; do
-        case $arg in
+    LABELS=""
+    while [ $# -gt 0 ]; do
+        case $1 in
             --dry-run)
                 DRY_RUN=true
                 ;;
@@ -50,10 +51,90 @@ parse_args() {
                     SANITY_TEST=true
                 fi
                 ;;
+            --labels)
+                shift
+                LABELS="$1"
+                ;;
         esac
+        shift
+    done
+}
+
+# Label filter state (populated once by _init_label_filter)
+_LABEL_FILTER_INITIALIZED=false
+_CATEGORIZED_PREFIXES=""  # all tests/* prefixes in the JSON
+_INCLUDED_PREFIXES=""     # tests/* prefixes whose group labels match LABELS
+
+_init_label_filter() {
+    if [ "$_LABEL_FILTER_INITIALIZED" = true ]; then
+        return
+    fi
+    _LABEL_FILTER_INITIALIZED=true
+
+    local overrides_file="${SCRIPT_DIR}/codeowner_overrides.json"
+    if [ ! -f "$overrides_file" ]; then
+        echo "Warning: $overrides_file not found, label filter disabled" >&2
+        LABELS=""
+        return
+    fi
+
+    # All test prefixes mentioned in the JSON
+    _CATEGORIZED_PREFIXES=$(jq -r '.[].owners | keys[] | select(startswith("tests/"))' "$overrides_file")
+
+    # Test prefixes whose group labels intersect with the requested labels
+    IFS=',' read -ra _label_array <<< "$LABELS"
+    for _req_label in "${_label_array[@]}"; do
+        _req_label=$(echo "$_req_label" | xargs)  # trim whitespace
+        local matches
+        matches=$(jq -r --arg label "$_req_label" \
+            '.[] | select(.labels[] == $label) | .owners | keys[] | select(startswith("tests/"))' \
+            "$overrides_file")
+        if [ -n "$matches" ]; then
+            _INCLUDED_PREFIXES="${_INCLUDED_PREFIXES}${_INCLUDED_PREFIXES:+
+}${matches}"
+        fi
     done
 }
 
+# Filter a test file by PR labels.
+# Returns 0 (include) or 1 (skip). On skip, prints a brief reason to stdout.
+#
+# Four-step logic:
+#   1. No labels specified        → include (run everything)
+#   2. File matches included label → include (label hit)
+#   3. File is categorized but not included → skip (label mismatch)
+#   4. File is uncategorized       → include (always run)
+gh_label_filter() {
+    local test_file="$1"
+
+    # Step 1: no labels specified — include everything
+    if [ -z "$LABELS" ]; then
+        return 0
+    fi
+
+    _init_label_filter
+
+    # Step 2: file matches a prefix whose group label is in LABELS — include
+    while IFS= read -r prefix; do
+        [ -z "$prefix" ] && continue
+        if [[ "$test_file" == "$prefix"* ]]; then
+            return 0
+        fi
+    done <<< "$_INCLUDED_PREFIXES"
+
+    # Step 3: file is categorized in the JSON but no label matched — skip
+    while IFS= read -r prefix; do
+        [ -z "$prefix" ] && continue
+        if [[ "$test_file" == "$prefix"* ]]; then
+            echo "no matching label (have: $LABELS)"
+            return 1
+        fi
+    done <<< "$_CATEGORIZED_PREFIXES"
+
+    # Step 4: file not mentioned in any group — uncategorized, always run
+    return 0
+}
+
 # Print test mode banner
 print_test_mode_banner() {
     if [ "$DRY_RUN" = "true" ]; then

From 1489253ba96be172dbc5ab8e77137fe11d5a84ca Mon Sep 17 00:00:00 2001
From: Alex Yang <aleyang@nvidia.com>
Date: Tue, 3 Mar 2026 19:13:45 -0800
Subject: [PATCH 3/3] fix: address code review feedback on PR #2683

- Sanitize paths and usernames against newline injection in CODEOWNERS
  output (high severity)
- Validate group['owners'] is a dict, not just present (medium)
- Use readarray + quoted array expansion in pr-auto-label.yml to handle
  labels with spaces (e.g., "op: attention")

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/pr-auto-label.yml |  8 ++++----
 scripts/codeowner_analyzer.py       | 13 ++++++++++---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/pr-auto-label.yml b/.github/workflows/pr-auto-label.yml
index 8b1587a7bd..907ab392fb 100644
--- a/.github/workflows/pr-auto-label.yml
+++ b/.github/workflows/pr-auto-label.yml
@@ -40,11 +40,11 @@ jobs:
 
           # Read override groups and apply matching labels
           jq -c '.[]' scripts/codeowner_overrides.json | while read -r group; do
-            labels=$(echo "$group" | jq -r '.labels[]')
-            prefixes=$(echo "$group" | jq -r '.owners | keys[]')
+            readarray -t labels < <(echo "$group" | jq -r '.labels[]')
+            readarray -t prefixes < <(echo "$group" | jq -r '.owners | keys[]')
 
             match=false
-            for prefix in $prefixes; do
+            for prefix in "${prefixes[@]}"; do
               for file in $changed_files; do
                 case "$file" in
                   "$prefix"*) match=true; break ;;
@@ -54,7 +54,7 @@ jobs:
             done
 
             if $match; then
-              for label in $labels; do
+              for label in "${labels[@]}"; do
                 echo "Adding label: $label"
                 gh pr edit "$PR_NUMBER" \
                   --repo "${{ github.repository }}" \
diff --git a/scripts/codeowner_analyzer.py b/scripts/codeowner_analyzer.py
index 6a31562120..935d9bffaa 100644
--- a/scripts/codeowner_analyzer.py
+++ b/scripts/codeowner_analyzer.py
@@ -31,8 +31,8 @@ def load_overrides(filename: str) -> Dict[str, List[str]]:
 
     overrides: Dict[str, List[str]] = {}
     for group in raw:
-        if not isinstance(group, dict) or "owners" not in group:
-            raise ValueError("Each group must be an object with an 'owners' key")
+        if not isinstance(group, dict) or not isinstance(group.get("owners"), dict):
+            raise ValueError("Each group must be an object with an 'owners' dict")
         for path, users in group["owners"].items():
             if not isinstance(users, list) or not all(
                 isinstance(u, str) for u in users
@@ -628,7 +628,14 @@ def generate_codeowners_file(
                     final = self._normalize_usernames(self.owner_overrides[path])
 
                 if final:
-                    f.write(f"{self._format_codeowners_path(path)} {' '.join(final)}\n")
+                    # Sanitize against newline injection
+                    safe_path = (
+                        self._format_codeowners_path(path)
+                        .replace("\n", "")
+                        .replace("\r", "")
+                    )
+                    safe_owners = [u.replace("\n", "").replace("\r", "") for u in final]
+                    f.write(f"{safe_path} {' '.join(safe_owners)}\n")
 
     def print_detailed_report(self, results: Dict[str, Any]) -> None:
         """Print a detailed ownership report."""