diff --git a/.agents/scripts/contributor-activity-helper.sh b/.agents/scripts/contributor-activity-helper.sh index 18e334406..1818d4a48 100755 --- a/.agents/scripts/contributor-activity-helper.sh +++ b/.agents/scripts/contributor-activity-helper.sh @@ -3,7 +3,12 @@ # # Sources activity data exclusively from immutable git commit history to prevent # manipulation. Each contributor's activity is measured by commits, active days, -# and productive time spans (first-to-last commit per active day). +# and commit type (direct vs PR merges). +# +# Commit type detection uses the committer email field: +# - committer=noreply@github.com → GitHub squash-merged a PR (automated output) +# - committer=actions@github.com → GitHub Actions (bot, filtered out) +# - committer=author's own email → direct push (human or headless CLI) # # GitHub noreply emails (NNN+login@users.noreply.github.com) are used to map # git author names to GitHub logins, normalising multiple author name variants @@ -13,18 +18,46 @@ # contributor-activity-helper.sh summary [--period day|week|month|year] # contributor-activity-helper.sh table [--format markdown|json] # contributor-activity-helper.sh user +# contributor-activity-helper.sh cross-repo-summary [ ...] [--period month] # # Output: markdown table or JSON suitable for embedding in health issues. set -euo pipefail +# Shared Python helper functions injected into all Python blocks to avoid +# duplication. Defined once here, passed via sys.argv to each invocation. +# shellcheck disable=SC2016 +PYTHON_HELPERS=' +def email_to_login(email): + """Map git email to GitHub login. Normalises noreply emails.""" + if email.endswith("@users.noreply.github.com"): + local_part = email.split("@")[0] + return local_part.split("+", 1)[1] if "+" in local_part else local_part + if email in ("actions@github.com", "action@github.com"): + return "github-actions" + return email.split("@")[0] + +def is_bot(login): + """Check if a login belongs to a bot account.""" + if login == "github-actions": + return True + if login.endswith("[bot]") or login.endswith("-bot"): + return True + return False + +def is_pr_merge(committer_email): + """Detect GitHub squash-merge (committer=noreply@github.com).""" + return committer_email == "noreply@github.com" +' + ####################################### # Compute activity summary for all contributors in a repo # # Reads git log and computes per-contributor stats: -# - Total commits (by period) -# - Active days -# - Productive hours (sum of daily first-to-last commit spans) +# - Direct commits (committer = author's own email) +# - PR merges (committer = noreply@github.com, i.e. GitHub squash-merge) +# - Total commits +# - Active days (with day list in JSON for cross-repo deduplication) # - Average commits per active day # # Arguments: @@ -43,7 +76,9 @@ compute_activity() { return 1 fi - # Determine --since based on period + # Determine --since based on period. + # Values are hardcoded from the case statement below — no user input reaches + # the git command, so word splitting via SC2086 is safe here. local since_arg="" case "$period" in day) @@ -63,10 +98,13 @@ compute_activity() { ;; esac - # Get git log: email|ISO-date (one line per commit) + # Get git log: author_email|committer_email|ISO-date (one line per commit) + # The committer email distinguishes PR merges from direct commits: + # noreply@github.com = GitHub squash-merged a PR + # author's own email = direct push local git_data # shellcheck disable=SC2086 - git_data=$(git -C "$repo_path" log --all --format='%ae|%aI' $since_arg) || git_data="" + git_data=$(git -C "$repo_path" log --all --format='%ae|%ce|%aI' $since_arg) || git_data="" if [[ -z "$git_data" ]]; then if [[ "$format" == "json" ]]; then @@ -85,25 +123,31 @@ import json from collections import defaultdict from datetime import datetime, timezone -def email_to_login(email): - if email.endswith('@users.noreply.github.com'): - local = email.split('@')[0] - return local.split('+', 1)[1] if '+' in local else local - if email in ('actions@github.com', 'action@github.com'): - return 'github-actions' - return email.split('@')[0] +${PYTHON_HELPERS} -contributors = defaultdict(lambda: {'commits': 0, 'days': set(), 'daily_spans': defaultdict(list)}) +contributors = defaultdict(lambda: { + 'direct_commits': 0, + 'pr_merges': 0, + 'days': set(), +}) for line in sys.stdin: line = line.strip() if not line or '|' not in line: continue - email, date_str = line.split('|', 1) - login = email_to_login(email) + parts = line.split('|', 2) + if len(parts) < 3: + continue + author_email, committer_email, date_str = parts + login = email_to_login(author_email) # Skip bot accounts (GitHub Actions, Dependabot, Renovate, etc.) - if login == 'github-actions' or login.endswith('[bot]') or login.endswith('-bot'): + if is_bot(login): + continue + + # Also skip if the committer is a bot (Actions, Dependabot, etc.) + committer_login = email_to_login(committer_email) + if is_bot(committer_login): continue try: @@ -112,30 +156,31 @@ for line in sys.stdin: continue day = dt.strftime('%Y-%m-%d') - contributors[login]['commits'] += 1 contributors[login]['days'].add(day) - contributors[login]['daily_spans'][day].append(dt) + + if is_pr_merge(committer_email): + contributors[login]['pr_merges'] += 1 + else: + contributors[login]['direct_commits'] += 1 results = [] -for login, data in sorted(contributors.items(), key=lambda x: -x[1]['commits']): +for login, data in sorted(contributors.items(), key=lambda x: -(x[1]['direct_commits'] + x[1]['pr_merges'])): active_days = len(data['days']) - commits = data['commits'] - - total_hours = 0.0 - for day, timestamps in data['daily_spans'].items(): - timestamps.sort() - span = (timestamps[-1] - timestamps[0]).total_seconds() / 3600 - total_hours += max(span, 0.25) - - avg_per_day = commits / active_days if active_days > 0 else 0 + total = data['direct_commits'] + data['pr_merges'] + avg_per_day = total / active_days if active_days > 0 else 0 - results.append({ + entry = { 'login': login, - 'commits': commits, + 'direct_commits': data['direct_commits'], + 'pr_merges': data['pr_merges'], + 'total_commits': total, 'active_days': active_days, - 'productive_hours': round(total_hours, 1), - 'avg_commits_per_day': round(avg_per_day, 1) - }) + 'avg_commits_per_day': round(avg_per_day, 1), + } + # JSON includes day list for cross-repo deduplication + if sys.argv[1] == 'json': + entry['active_days_list'] = sorted(data['days']) + results.append(entry) format_type = sys.argv[1] period_name = sys.argv[2] @@ -146,10 +191,10 @@ else: if not results: print(f'_No contributor activity in the last {period_name}._') else: - print('| Contributor | Commits | Active Days | Productive Hours | Avg/Day |') - print('| --- | ---: | ---: | ---: | ---: |') + print('| Contributor | Direct | PR Merges | Total | Active Days | Avg/Day |') + print('| --- | ---: | ---: | ---: | ---: | ---: |') for r in results: - print(f'| {r[\"login\"]} | {r[\"commits\"]} | {r[\"active_days\"]} | {r[\"productive_hours\"]}h | {r[\"avg_commits_per_day\"]} |') + print(f'| {r[\"login\"]} | {r[\"direct_commits\"]} | {r[\"pr_merges\"]} | {r[\"total_commits\"]} | {r[\"active_days\"]} | {r[\"avg_commits_per_day\"]} |') " "$format" "$period" return 0 @@ -172,9 +217,9 @@ user_activity() { return 1 fi - # Get all commits (match by noreply email pattern in Python) + # Get all commits with author + committer emails local git_data - git_data=$(git -C "$repo_path" log --all --format='%ae|%aI' --since='1.year.ago') || git_data="" + git_data=$(git -C "$repo_path" log --all --format='%ae|%ce|%aI' --since='1.year.ago') || git_data="" # Target login passed via sys.argv to avoid shell injection. echo "$git_data" | python3 -c " @@ -183,13 +228,7 @@ import json from collections import defaultdict from datetime import datetime, timedelta, timezone -def email_to_login(email): - if email.endswith('@users.noreply.github.com'): - local = email.split('@')[0] - return local.split('+', 1)[1] if '+' in local else local - if email in ('actions@github.com', 'action@github.com'): - return 'github-actions' - return email.split('@')[0] +${PYTHON_HELPERS} target = sys.argv[1] now = datetime.now(timezone.utc) @@ -201,17 +240,25 @@ periods = { 'this_year': now.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0), } -counts = {p: {'commits': 0, 'days': set(), 'hours': 0.0, 'daily_spans': defaultdict(list)} for p in periods} +counts = {p: {'direct_commits': 0, 'pr_merges': 0, 'days': set()} for p in periods} for line in sys.stdin: line = line.strip() if not line or '|' not in line: continue - email, date_str = line.split('|', 1) - login = email_to_login(email) + parts = line.split('|', 2) + if len(parts) < 3: + continue + author_email, committer_email, date_str = parts + login = email_to_login(author_email) if login != target: continue + # Skip if committer is a bot (Actions, Dependabot, etc.) + committer_login = email_to_login(committer_email) + if is_bot(committer_login): + continue + try: dt = datetime.fromisoformat(date_str.replace('Z', '+00:00')) except ValueError: @@ -221,23 +268,21 @@ for line in sys.stdin: for period_name, start in periods.items(): start_aware = start.replace(tzinfo=timezone.utc) if start.tzinfo is None else start if dt >= start_aware: - counts[period_name]['commits'] += 1 counts[period_name]['days'].add(day) - counts[period_name]['daily_spans'][day].append(dt) + if is_pr_merge(committer_email): + counts[period_name]['pr_merges'] += 1 + else: + counts[period_name]['direct_commits'] += 1 result = {'login': target} for period_name in ('today', 'this_week', 'this_month', 'this_year'): data = counts[period_name] - total_hours = 0.0 - for day, timestamps in data['daily_spans'].items(): - timestamps.sort() - span = (timestamps[-1] - timestamps[0]).total_seconds() / 3600 - total_hours += max(span, 0.25) - + total = data['direct_commits'] + data['pr_merges'] result[period_name] = { - 'commits': data['commits'], + 'direct_commits': data['direct_commits'], + 'pr_merges': data['pr_merges'], + 'total_commits': total, 'active_days': len(data['days']), - 'productive_hours': round(total_hours, 1) } print(json.dumps(result, indent=2)) @@ -246,6 +291,134 @@ print(json.dumps(result, indent=2)) return 0 } +####################################### +# Cross-repo activity summary +# +# Aggregates activity across multiple repos without revealing repo names +# (cross-repo privacy). Uses active_days_list from JSON output to +# deduplicate days across repos (set union, not sum). +# +# Arguments: +# $1..N - repo paths (at least one required) +# --period day|week|month|year (optional, default: month) +# --format markdown|json (optional, default: markdown) +# Output: aggregated table to stdout +####################################### +cross_repo_summary() { + local period="month" + local format="markdown" + local -a repo_paths=() + + # Parse arguments + while [[ $# -gt 0 ]]; do + case "$1" in + --period) + period="${2:-month}" + shift 2 + ;; + --format) + format="${2:-markdown}" + shift 2 + ;; + *) + repo_paths+=("$1") + shift + ;; + esac + done + + if [[ ${#repo_paths[@]} -eq 0 ]]; then + echo "Error: at least one repo path required" >&2 + return 1 + fi + + # Collect JSON (with active_days_list) from each repo, then aggregate + local all_json="[" + local first="true" + local repo_count=0 + for rp in "${repo_paths[@]}"; do + if [[ ! -d "$rp/.git" && ! -f "$rp/.git" ]]; then + echo "Warning: $rp is not a git repository, skipping" >&2 + continue + fi + local repo_json + repo_json=$(compute_activity "$rp" "$period" "json") || repo_json="[]" + if [[ "$first" == "true" ]]; then + first="false" + else + all_json="${all_json}," + fi + all_json="${all_json}{\"data\":${repo_json}}" + repo_count=$((repo_count + 1)) + done + all_json="${all_json}]" + + # Aggregate across repos in Python — deduplicate active days via set union + echo "$all_json" | python3 -c " +import sys +import json + +format_type = sys.argv[1] +period_name = sys.argv[2] +repo_count = int(sys.argv[3]) + +repos = json.load(sys.stdin) + +# Aggregate per contributor across all repos. +# active_days uses set union to avoid double-counting days where a +# contributor committed in multiple repos on the same calendar day. +totals = {} +for repo in repos: + for entry in repo.get('data', []): + login = entry['login'] + if login not in totals: + totals[login] = { + 'direct_commits': 0, + 'pr_merges': 0, + 'total_commits': 0, + 'active_days_set': set(), + 'repo_count': 0, + } + totals[login]['direct_commits'] += entry.get('direct_commits', 0) + totals[login]['pr_merges'] += entry.get('pr_merges', 0) + totals[login]['total_commits'] += entry.get('total_commits', 0) + # Union of day strings — deduplicates cross-repo overlaps + for day_str in entry.get('active_days_list', []): + totals[login]['active_days_set'].add(day_str) + if entry.get('total_commits', 0) > 0: + totals[login]['repo_count'] += 1 + +results = [] +for login, data in sorted(totals.items(), key=lambda x: -x[1]['total_commits']): + active_days = len(data['active_days_set']) + avg = data['total_commits'] / active_days if active_days > 0 else 0 + results.append({ + 'login': login, + 'direct_commits': data['direct_commits'], + 'pr_merges': data['pr_merges'], + 'total_commits': data['total_commits'], + 'active_days': active_days, + 'repos_active': data['repo_count'], + 'avg_commits_per_day': round(avg, 1), + }) + +if format_type == 'json': + print(json.dumps(results, indent=2)) +else: + if not results: + print(f'_No cross-repo activity in the last {period_name}._') + else: + print(f'_Across {repo_count} managed repos:_') + print() + print('| Contributor | Direct | PR Merges | Total | Active Days | Repos | Avg/Day |') + print('| --- | ---: | ---: | ---: | ---: | ---: | ---: |') + for r in results: + print(f'| {r[\"login\"]} | {r[\"direct_commits\"]} | {r[\"pr_merges\"]} | {r[\"total_commits\"]} | {r[\"active_days\"]} | {r[\"repos_active\"]} | {r[\"avg_commits_per_day\"]} |') +" "$format" "$period" "$repo_count" + + return 0 +} + ####################################### # Main ####################################### @@ -285,6 +458,9 @@ main() { fi user_activity "$repo_path" "$login" ;; + cross-repo-summary) + cross_repo_summary "$@" + ;; help | *) echo "Usage: $0 [options]" echo "" @@ -292,9 +468,14 @@ main() { echo " summary [--period day|week|month|year] [--format markdown|json]" echo " table [--period day|week|month|year] [--format markdown|json]" echo " user " + echo " cross-repo-summary [path2 ...] [--period month] [--format markdown]" echo "" echo "Computes contributor activity from immutable git commit history." echo "GitHub noreply emails are used to normalise author names to logins." + echo "" + echo "Commit types:" + echo " Direct - committer is the author (push, CLI commit)" + echo " PR Merge - committer is noreply@github.com (GitHub squash-merge)" return 0 ;; esac diff --git a/.agents/scripts/pulse-wrapper.sh b/.agents/scripts/pulse-wrapper.sh index 5c5f5279b..b859d6265 100755 --- a/.agents/scripts/pulse-wrapper.sh +++ b/.agents/scripts/pulse-wrapper.sh @@ -1650,6 +1650,7 @@ _get_runner_role() { _update_health_issue_for_repo() { local repo_slug="$1" local repo_path="$2" + local cross_repo_md="${3:-}" [[ -z "$repo_slug" ]] && return 0 @@ -1963,7 +1964,9 @@ ${worker_table}" session_warning=" **WARNING: exceeds threshold of ${SESSION_COUNT_WARN}**" fi - # --- Contributor activity from git history --- + # --- Contributor activity from git history (per-repo only) --- + # Cross-repo totals are pre-computed once in update_health_issues() and + # passed via $3 to avoid redundant git log walks (N repos × N repos). local activity_md="" local activity_helper="${HOME}/.aidevops/agents/scripts/contributor-activity-helper.sh" if [[ -x "$activity_helper" ]]; then @@ -2004,6 +2007,10 @@ ${workers_md} ${activity_md} +### Cross-Repo Totals (last 30 days) + +${cross_repo_md:-_Single repo or cross-repo data unavailable._} + ### System Resources | Metric | Value | @@ -2151,10 +2158,28 @@ update_health_issues() { return 0 fi + # Pre-compute cross-repo activity summary ONCE for all health issues. + # This avoids N×N git log walks (one cross-repo scan per repo dashboard). + local cross_repo_md="" + local activity_helper="${HOME}/.aidevops/agents/scripts/contributor-activity-helper.sh" + if [[ -x "$activity_helper" ]]; then + local all_repo_paths + all_repo_paths=$(jq -r '.initialized_repos[] | select(.pulse == true and (.local_only // false) == false) | .path' "$repos_json" 2>/dev/null || echo "") + if [[ -n "$all_repo_paths" ]]; then + local -a cross_args=() + while IFS= read -r rp; do + [[ -n "$rp" ]] && cross_args+=("$rp") + done <<<"$all_repo_paths" + if [[ ${#cross_args[@]} -gt 1 ]]; then + cross_repo_md=$(bash "$activity_helper" cross-repo-summary "${cross_args[@]}" --period month --format markdown || echo "_Cross-repo data unavailable._") + fi + fi + fi + local updated=0 while IFS='|' read -r slug path; do [[ -z "$slug" ]] && continue - _update_health_issue_for_repo "$slug" "$path" || true + _update_health_issue_for_repo "$slug" "$path" "$cross_repo_md" || true updated=$((updated + 1)) done <<<"$repo_entries"