diff --git a/.claude/skills/analyze-quarterly-metrics/SKILL.md b/.claude/skills/analyze-quarterly-metrics/SKILL.md new file mode 100644 index 0000000..10d9cc6 --- /dev/null +++ b/.claude/skills/analyze-quarterly-metrics/SKILL.md @@ -0,0 +1,175 @@ +--- +name: analyze-quarterly-metrics +description: This skill should be used when the user asks to "analyze quarterly metrics", "analyze the quarter", "generate quarterly report", "quarterly analysis", mentions a specific quarter like "2026-Q2", or discusses trends, risks, and growth opportunities for Linux System Roles metrics. +--- + +# Quarterly Metrics Analysis Skill + +This skill analyzes quarterly metrics data and generates a FOCUSED report (~400-500 words) with key metrics, highlights, top downloaded roles, concerns, and actionable recommendations. + +## What to do + +1. **Determine if the quarter is complete or in-progress:** + - Get today's date and compare to the quarter being analyzed + - If analyzing current or future quarter: Add a disclaimer that data is PARTIAL/INCOMPLETE + - Adjust your interpretation: low numbers may just mean "not much time has passed yet", not a crisis + - For partial quarters: focus on trends and rates rather than absolute numbers + +2. **Load the raw data** for the specified quarter: + - Read the summary CSV files: `data/github_prs_summary.csv`, `data/github_issues_summary.csv`, `data/galaxy_legacy_summary.csv`, `data/galaxy_collections_summary.csv` + - Extract data for the specified quarter and the previous quarter for comparison + - If `data/{{quarter}}/galaxy_legacy.csv` exists, read it for top downloaded roles and growth analysis + +3. **Calculate key metrics** from the raw data: + - **PR Merge Rate**: PRs Merged / (PRs Created - PRs Open) × 100 (excludes PRs still under review) + - **External Acceptance Rate**: External PRs Merged / (External PRs Created - External PRs Open) × 100 + - **External Contribution %**: External PRs Created / PRs Created × 100 + - **Issue Resolution Rate**: Issues Closed / Issues Created × 100 + - **QoQ Growth rates**: Compare current quarter to previous quarter + - **Fastest growing roles**: If per-role data exists, identify top gainers by comparing to previous quarter + +4. **Analyze the data** and generate a FOCUSED report with these sections: + + ### Executive Summary (2-3 sentences) + - High-level overview: what's the overall health? One key win, one key concern. + - Note if data is partial (for incomplete quarters) + + ### Key Metrics (Table format) + - Comparison table: Current vs Previous Quarter + - PRs Created, Merged, Merge Rate + - External PRs, External Acceptance Rate, External % of total + - Issues Created, Closed, Resolution Rate + - Galaxy Legacy Downloads (QoQ change) + - Galaxy Collections Downloads (QoQ change) + + ### Highlights (2-3 bullets) + - Notable achievements and positive changes + - Metrics that improved + - Balance the concerns with wins + + ### Top Downloaded Roles (Top 5 list) + - List the 5 most downloaded roles with cumulative totals + - If per-role data exists for current and previous quarter, also show fastest growing/declining roles + + ### Top Concerns (3-4 bullets) + - Most critical issues requiring attention + - Use specific numbers and percentages + - Only urgent/high-impact issues + + ### Recommendations (3-4 bullets) + - Most important actionable items + - Be specific and brief + - Focus on high-impact actions + + **TARGET LENGTH**: ~400-500 words. Scannable but informative. + +5. **Be specific with numbers**: Always cite actual metrics, percentages, and comparisons. Don't use vague language like "significant" without quantifying it. + +6. **Be smart about partial quarters**: + - If the quarter is incomplete, DO NOT flag low absolute numbers as risks + - Focus on rates (merge rate, acceptance rate) rather than volumes for partial data + - Only flag things as concerns if they represent actual problems, not just "we're only 2 weeks into the quarter" + - Make it clear in the Executive Summary if data is partial + +7. **Validate the report for accuracy**: + - Re-read all numeric claims in your draft + - For each number: verify it matches the correct CSV column/row + - For each comparison: verify the direction (higher/lower) matches the math + - Check for contradictions: "up from X, but below X" is impossible + - If you find errors, fix them before saving + +8. **Automatically save the report**: + - Save to `reports/{{quarter}}-analysis.md` + - Create the reports directory if it doesn't exist + - **Safe to overwrite**: If the report file already exists, overwrite it (reports are tracked in git, so previous versions are preserved) + - Show the user a brief summary and notify where the full file was saved + +## Guidelines + +- **Balanced and scannable**: Target ~400-500 words, readable in 2-3 minutes +- **Detect partial quarters**: Note if data is incomplete in the Executive Summary +- **Compare to previous quarter**: QoQ changes only, skip historical deep-dives +- **Balance concerns with wins**: Include both Highlights and Top Concerns sections +- **Show what matters**: Top downloaded roles help prioritize work +- **Only critical issues**: If it's not urgent or high-impact, skip it from concerns +- **Be actionable**: Every recommendation must be specific and implementable +- **Don't cry wolf on partial data**: For incomplete quarters, only flag true risks (bad rates, declining trends), not low volumes +- **Use numbers**: Always cite specific metrics, no vague language + +### Data Accuracy and Validation + +**CRITICAL: Verify all numbers before making claims** + +1. **Use correct data sources**: + - When discussing a specific collection (e.g., fedora.linux_system_roles), use that collection's column, NOT the Total Downloads column + - When discussing totals, clearly state "total across all collections" + - Double-check: Does the number in your sentence match the CSV cell you're referencing? + +2. **Verify comparison direction**: + - If A > B: use "higher than", "above", "exceeds", "increased from" + - If A < B: use "lower than", "below", "decreased from", "down from" + - If A ≈ B: use "similar to", "comparable to", "roughly equal to" + - **NEVER** say "A is well below B" when A > B or vice versa + +3. **Validate calculations**: + - QoQ growth = ((Current - Previous) / Previous) × 100 + - If growth is positive, use "increased" or "up"; if negative, use "decreased" or "down" + - Check sign: positive growth means increase, negative growth means decrease + +4. **Before finalizing the report**: + - Re-read each numeric claim + - Verify the number matches the data source (correct CSV column/row) + - Verify the comparison direction matches the math (higher/lower/equal) + - Check for internal contradictions (e.g., "up from X, but well below X") + +## Example invocations + +**Via skill invocation:** +User types: `/analyze-quarterly-metrics 2026-Q2` + +**Via natural language:** +- "Analyze the quarterly metrics for 2026-Q2" +- "Generate a quarterly report for Q2 2026" +- "What do the metrics show for this quarter?" + +You should: +1. Extract the quarter from the user's request or args (format: YYYY-QN, e.g., 2026-Q2) +2. Determine if the quarter is complete or in-progress based on today's date +3. Read the raw metrics data from CSV files for that quarter and previous quarter +4. Read per-role Galaxy data if available for top downloaded roles analysis +5. Calculate derived metrics (merge rates, growth rates, etc.) +6. Generate a focused report (~400-500 words): Executive Summary, Key Metrics, Highlights, Top Downloaded Roles, Top Concerns, Recommendations +7. Validate: verify all numbers match data sources +8. Save to `reports/{{quarter}}-analysis.md` +9. Show the full report to user and notify where file was saved + +## Important + +- **Check if quarter is complete**: Compare today's date to quarter end (Q1: Jan-Mar, Q2: Apr-Jun, Q3: Jul-Sep, Q4: Oct-Dec) +- For partial quarters: add disclaimer in Executive Summary +- **Calculate metrics from CSV data** - don't rely on pre-computed files +- **Read per-role data** if `data/{{quarter}}/galaxy_legacy.csv` exists for top downloaded roles +- **Balanced report**: ~400-500 words, 6 sections (Summary, Metrics, Highlights, Top Roles, Concerns, Recommendations) +- **Auto-save** to `reports/{{quarter}}-analysis.md` (safe to overwrite, tracked in git) +- Show full report to user and file location + +## Metric Calculation Formulas + +Use these formulas when calculating metrics from the raw CSV data: + +**PR Metrics:** +- Merge Rate = (PRs Merged) / (PRs Created - PRs Open) × 100 + - Excludes PRs still under review from the calculation +- External Acceptance = (External PRs Merged) / (External PRs Created - External PRs Open) × 100 + - Excludes external PRs still under review +- External % = (External PRs Created) / (PRs Created) × 100 +- QoQ Growth = ((Current - Previous) / Previous) × 100 + +**Issue Metrics:** +- Resolution Rate = (Issues Closed) / (Issues Created) × 100 +- External % = (External Issues Created) / (Issues Created) × 100 +- QoQ Growth = ((Current - Previous) / Previous) × 100 + +**Galaxy Metrics:** +- Legacy QoQ Growth = ((Current Total - Previous Total) / Previous Total) × 100 +- Collections QoQ Growth = ((Current Total - Previous Total) / Previous Total) × 100 diff --git a/.github/workflows/quarterly-metrics.yml b/.github/workflows/quarterly-metrics.yml new file mode 100644 index 0000000..db48c2b --- /dev/null +++ b/.github/workflows/quarterly-metrics.yml @@ -0,0 +1,171 @@ +name: Quarterly Metrics Report + +on: + # Scheduled: Run on the last day of each quarter at 9am UTC + schedule: + - cron: '0 9 31 3,12 *' # March 31, December 31 at 9am UTC + - cron: '0 9 30 6,9 *' # June 30, September 30 at 9am UTC + + # Manual trigger with optional parameters + workflow_dispatch: + inputs: + quarter: + description: 'Quarter (e.g., 2025-Q1)' + required: false + type: string + date_range: + description: 'Date range (e.g., 2025-01-01..2025-03-31)' + required: false + type: string + +jobs: + generate-report: + runs-on: ubuntu-latest + + permissions: + contents: write # Required to commit and push changes + pull-requests: write # Required to create pull requests + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install Python dependencies + run: | + pip install -r requirements.txt + + - name: Install GitHub CLI + run: | + # gh CLI is pre-installed on ubuntu-latest runners + gh --version + + - name: Determine quarter and date range + id: quarter + env: + INPUT_QUARTER: ${{ github.event.inputs.quarter }} + INPUT_DATE_RANGE: ${{ github.event.inputs.date_range }} + run: | + # Use input if provided, otherwise calculate current quarter + if [ -n "$INPUT_QUARTER" ]; then + QUARTER="$INPUT_QUARTER" + else + # Calculate quarter from current date + YEAR=$(date +%Y) + MONTH=$(date +%m) + Q=$(( (MONTH - 1) / 3 + 1 )) + QUARTER="${YEAR}-Q${Q}" + fi + echo "quarter=${QUARTER}" >> $GITHUB_OUTPUT + + # Determine date range + if [ -n "$INPUT_DATE_RANGE" ]; then + DATE_RANGE="$INPUT_DATE_RANGE" + else + # Auto-calculate date range based on quarter + YEAR=$(echo ${QUARTER} | cut -d'-' -f1) + Q_NUM=$(echo ${QUARTER} | cut -d'Q' -f2) + + case ${Q_NUM} in + 1) + DATE_RANGE="${YEAR}-01-01..${YEAR}-03-31" + ;; + 2) + DATE_RANGE="${YEAR}-04-01..${YEAR}-06-30" + ;; + 3) + DATE_RANGE="${YEAR}-07-01..${YEAR}-09-30" + ;; + 4) + DATE_RANGE="${YEAR}-10-01..${YEAR}-12-31" + ;; + esac + fi + echo "date_range=${DATE_RANGE}" >> $GITHUB_OUTPUT + + - name: Collect GitHub statistics + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + QUARTER: ${{ steps.quarter.outputs.quarter }} + DATE_RANGE: ${{ steps.quarter.outputs.date_range }} + run: bash scripts/collect_all_github_stats.sh + + - name: Collect Galaxy statistics + env: + GALAXY_API_KEY: ${{ secrets.GALAXY_API_KEY }} + QUARTER: ${{ steps.quarter.outputs.quarter }} + run: python3 scripts/collect_galaxy_stats.py + + - name: Update quarterly summary files + env: + QUARTER: ${{ steps.quarter.outputs.quarter }} + run: python3 scripts/update_quarterly_summary.py + + - name: Generate graphs + env: + QUARTER: ${{ steps.quarter.outputs.quarter }} + run: python3 scripts/generate_graphs.py + + - name: Create Pull Request with results + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + QUARTER: ${{ steps.quarter.outputs.quarter }} + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + git add data/ reports/ + + # Check if there are changes to commit + if git diff --staged --quiet; then + echo "No changes to commit" + exit 0 + fi + + # Create or switch to branch for this quarter + BRANCH_NAME="metrics/${QUARTER}" + if git ls-remote --exit-code --heads origin "$BRANCH_NAME" >/dev/null 2>&1; then + echo "Branch $BRANCH_NAME exists, updating it" + git fetch origin "$BRANCH_NAME" + git checkout -B "$BRANCH_NAME" "origin/$BRANCH_NAME" + else + echo "Creating new branch $BRANCH_NAME" + git checkout -b "$BRANCH_NAME" + fi + + # Commit changes + git commit -m "Add metrics for ${QUARTER}" -m "Generated by GitHub Actions workflow" -m "- Data: data/${QUARTER}/" -m "- Graphs: reports/images/" + + # Push the branch + git push --set-upstream origin "$BRANCH_NAME" + + # Create Pull Request if it doesn't exist + PR_URL=$(gh pr list --head "$BRANCH_NAME" --base main --json url -q '.[0].url') + if [ -n "$PR_URL" ]; then + echo "PR already exists: $PR_URL" + else + gh pr create \ + --title "Quarterly Metrics - ${QUARTER}" \ + --body "Automated metrics collection for ${QUARTER}. **Data:** \`data/${QUARTER}/\` **Graphs:** \`reports/images/\` After merging, generate analysis: \`/analyze-quarterly-metrics ${QUARTER}\`" \ + --base main \ + --head "$BRANCH_NAME" + fi + + - name: Create workflow summary + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + QUARTER: ${{ steps.quarter.outputs.quarter }} + run: | + echo "## Quarterly Metrics - ${QUARTER}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + PR_URL=$(gh pr view metrics/${QUARTER} --json url -q .url 2>/dev/null || echo "") + if [ -n "$PR_URL" ]; then + echo "**[Review Pull Request](${PR_URL})**" >> $GITHUB_STEP_SUMMARY + else + echo "Data: \`data/${QUARTER}/\`" >> $GITHUB_STEP_SUMMARY + fi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..92afa22 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +venv/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5846b60 --- /dev/null +++ b/Makefile @@ -0,0 +1,49 @@ +# Makefile for Linux System Roles Quarterly Metrics +.PHONY: help collect-github collect-galaxy update-summary generate-graphs quarterly-report clean + +# Auto-detect quarter and date range +CURRENT_MONTH := $(shell date +%-m) +CURRENT_YEAR := $(shell date +%Y) +CURRENT_Q := $(shell echo $$((($(CURRENT_MONTH)-1)/3+1))) +QUARTER ?= $(CURRENT_YEAR)-Q$(CURRENT_Q) +YEAR := $(shell echo $(QUARTER) | cut -d'-' -f1) +Q_NUM := $(shell echo $(QUARTER) | cut -d'Q' -f2) + +ifeq ($(Q_NUM),1) + DATE_RANGE := $(YEAR)-01-01..$(YEAR)-03-31 +else ifeq ($(Q_NUM),2) + DATE_RANGE := $(YEAR)-04-01..$(YEAR)-06-30 +else ifeq ($(Q_NUM),3) + DATE_RANGE := $(YEAR)-07-01..$(YEAR)-09-30 +else ifeq ($(Q_NUM),4) + DATE_RANGE := $(YEAR)-10-01..$(YEAR)-12-31 +endif + +help: + @echo "Targets: collect-github, collect-galaxy, update-summary, generate-graphs, quarterly-report, clean" + @echo "Current quarter: $(QUARTER) ($(DATE_RANGE))" + @echo "Requires: GITHUB_TOKEN, optionally GALAXY_API_KEY" + +collect-github: + @test -n "$(GITHUB_TOKEN)" || (echo "ERROR: GITHUB_TOKEN not set" && exit 1) + @QUARTER=$(QUARTER) DATE_RANGE=$(DATE_RANGE) bash scripts/collect_all_github_stats.sh + +collect-galaxy: + @QUARTER=$(QUARTER) python3 scripts/collect_galaxy_stats.py + +update-summary: + @QUARTER=$(QUARTER) python3 scripts/update_quarterly_summary.py + +generate-graphs: + @QUARTER=$(QUARTER) python3 scripts/generate_graphs.py + +quarterly-report: collect-github collect-galaxy update-summary generate-graphs + @echo "" + @echo "✅ Complete for $(QUARTER)" + @echo "To generate AI analysis with your AI agent:" + @echo " Use the skill defined in .claude/skills/analyze-quarterly-metrics/" + @echo " (For Claude Code users: /analyze-quarterly-metrics $(QUARTER))" + +clean: + @find . -type f -name "*.pyc" -delete + @find . -type d -name "__pycache__" -delete diff --git a/README.md b/README.md new file mode 100644 index 0000000..3e0853c --- /dev/null +++ b/README.md @@ -0,0 +1,158 @@ +# Linux System Roles - Upstream Metrics + +Automated quarterly metrics reporting for the Linux System Roles project, tracking GitHub activity and Ansible Galaxy downloads. + +## Overview + +Collects metrics on: +- **GitHub Activity**: Pull requests and issues from linux-system-roles repositories +- **Ansible Galaxy**: Download counts for legacy roles and collections + +## Automated Quarterly Collection + +GitHub Actions automatically collects metrics on the **last day of each quarter**: +- **March 31** - Collects Q1 data (Jan 1 - Mar 31) +- **June 30** - Collects Q2 data (Apr 1 - Jun 30) +- **September 30** - Collects Q3 data (Jul 1 - Sep 30) +- **December 31** - Collects Q4 data (Oct 1 - Dec 31) + +The workflow creates a Pull Request with the collected data and graphs. + +### After Data Collection + +Once the PR is merged, generate the quarterly analysis report using the AI skill: + +``` +/analyze-quarterly-metrics 2026-Q2 +``` + +This generates a comprehensive analysis with trends, risks, and recommendations in `reports/2026-Q2-analysis.md`. + +## Local Development + +### Prerequisites + +1. **Python 3.9+**: + ```bash + pip install -r requirements.txt + ``` + +2. **GitHub CLI (`gh`)**: + ```bash + # Install: https://cli.github.com/ + gh auth login + ``` + +3. **Environment Variables**: + ```bash + export GITHUB_TOKEN="ghp_..." # Required: GitHub Personal Access Token + export GALAXY_API_KEY="..." # Optional: Prevents API rate limiting + ``` + + **About GALAXY_API_KEY:** + - **Optional** - data collection works without it + - **Why use it?** Authenticated requests have higher rate limits + - **When needed?** If you hit rate limits (429 errors) during Galaxy API calls + - **How to get it:** Login to galaxy.ansible.com → Profile → API Key + +### Quick Start + +Run the complete workflow for the current quarter: +```bash +make quarterly-report +``` + +Or for a specific quarter: +```bash +make quarterly-report QUARTER=2026-Q2 +``` + +### Makefile Targets + +```bash +make quarterly-report # Full workflow: collect + update + graphs +make collect-github # Collect GitHub PRs/Issues +make collect-galaxy # Collect Galaxy downloads +make update-summary # Update summary CSV files +make generate-graphs # Generate all graphs +make clean # Remove temporary files +``` + +## Data Sources + +### GitHub +- **Organization:** linux-system-roles (all repos except tft-tests, test-harness, auto-maintenance, .github, template) +- **External repo:** willshersystems/ansible-sshd +- **Time-based queries:** Uses `gh pr list -S "created:2026-04-01..2026-06-30"` +- **Excludes:** Bot PRs/issues, `ci:` PRs, `docs(changelog)` PRs, `[citest_skip]` PRs + +**⚠️ Methodology Changes (May 2026):** + +1. **Issues Closed Counting:** Prior to May 2026, "Issues Closed" only counted issues that were both created AND closed within the same quarter. Starting May 2026, the metric correctly counts ALL issues closed in the quarter regardless of creation date. + - Historical data (before May 2026): Undercounts closed issues + - Future data (May 2026+): Accurate count of issues closed in quarter + - Comparisons across this boundary are not directly valid + +2. **Bot and Automated PR Exclusions:** Starting May 2026, newly excluded: + - PRs/issues created by bots (using `author.is_bot` field) + - PRs with `[citest_skip]` in the title (automated test-skip PRs) + + *(Note: `ci:` and `docs(changelog)` PRs were already excluded in historical data since 2023-Q3)* + + Historical data (2023-Q3 through 2026-Q1) includes bot and `[citest_skip]` PRs, slightly inflating counts. + +### Ansible Galaxy +- **Legacy roles:** linux-system-roles namespace + willshersystems/sshd +- **Collections:** fedora.linux_system_roles, microsoft.sql +- **Snapshot-based:** Current totals only (no historical queries available) +- **Delta calculation:** Current quarter total minus previous quarter total + +## Graphs Generated + +**Historical (all quarters):** +- `github-prs.png` - PR statistics over time (6 metrics) +- `github-issues.png` - Issue statistics over time (4 metrics) +- `galaxy-legacy-total.png` - Cumulative legacy downloads +- `galaxy-legacy-total-delta.png` - Quarterly delta downloads +- `galaxy-collection-*.png` - Collection downloads per quarter + +**Quarter-specific:** +- `galaxy-legacy-per-role-2026-Q2.png` - Per-role cumulative totals +- `galaxy-legacy-per-role-delta-2026-Q2.png` - Per-role quarterly growth + +## GitHub Actions Workflow + +### Manual Trigger + +1. Go to **Actions** → **Quarterly Metrics Report** +2. Click **Run workflow** +3. Optionally override quarter and date range +4. Click **Run workflow** + +The workflow creates a Pull Request with the collected data. + +### Required Secrets + +- `GITHUB_TOKEN` - Automatically provided by GitHub Actions +- `GALAXY_API_KEY` - Optional, prevents rate limiting + +## Troubleshooting + +### GitHub Authentication +```bash +gh auth status # Check authentication +gh auth login # Re-authenticate +``` + +### Rate Limiting +If you see "rate limited" errors from Galaxy API: +```bash +export GALAXY_API_KEY="your-api-key" +``` + +### Network Timeouts +Retry logic handles most timeouts automatically (3 attempts with exponential backoff). If issues persist, check GitHub/Galaxy API status. + +## License + +See [LICENSE](LICENSE) file. diff --git a/data/2026-Q2/galaxy_collections.csv b/data/2026-Q2/galaxy_collections.csv new file mode 100644 index 0000000..26ba816 --- /dev/null +++ b/data/2026-Q2/galaxy_collections.csv @@ -0,0 +1,3 @@ +namespace,name,full_name,download_count +fedora,linux_system_roles,fedora.linux_system_roles,2572336 +microsoft,sql,microsoft.sql,112578 diff --git a/data/2026-Q2/galaxy_legacy.csv b/data/2026-Q2/galaxy_legacy.csv new file mode 100644 index 0000000..4eadd93 --- /dev/null +++ b/data/2026-Q2/galaxy_legacy.csv @@ -0,0 +1,40 @@ +name,download_count +ad_integration,10329 +aide,161 +bootloader,173621 +certificate,20473 +cockpit,210674 +crypto_policies,142654 +fapolicyd,1767 +firewall,90725 +gfs2,86 +ha_cluster,3100 +hpc,31 +image_builder,4304 +journald,188266 +kdump,49580 +kernel_settings,8456 +keylime_server,82 +logging,4363 +metrics,4881 +nbde_client,30683 +nbde_server,27942 +network,346387 +pam_pwd,6332 +podman,11446 +postfix,146101 +postgresql,8246 +rhc,26508 +selinux,191690 +snapshot,90 +ssh,4371 +sshd,650985 +storage,112064 +sudo,1125 +systemd,15566 +timesync,920898 +tlog,2485 +trustee_client,0 +trustee_server,0 +tuned,22772 +vpn,1634 diff --git a/data/2026-Q2/issues.csv b/data/2026-Q2/issues.csv new file mode 100644 index 0000000..2ab5724 --- /dev/null +++ b/data/2026-Q2/issues.csv @@ -0,0 +1,54 @@ +Role,Issues Created,Issues Closed,Created non-maint,Closed non-maint +ad_integration,0,1,0,1 +aide,0,0,0,0 +auditd,0,0,0,0 +bootloader,0,0,0,0 +certificate,0,0,0,0 +cockpit,0,0,0,0 +crypto_policies,0,0,0,0 +fapolicyd,0,0,0,0 +firewall,1,0,1,0 +gfs2,0,0,0,0 +ha_cluster,2,0,2,0 +journald,1,0,1,0 +kdump,0,0,0,0 +kernel_settings,0,0,0,0 +keylime_server,0,0,0,0 +logging,0,0,0,0 +metrics,0,0,0,0 +nbde_server,0,0,0,0 +network,0,0,0,0 +podman,1,0,1,0 +postfix,0,0,0,0 +postgresql,0,0,0,0 +rhc,0,0,0,0 +selinux,1,0,1,0 +snapshot,0,0,0,0 +ssh,0,0,0,0 +storage,0,0,0,0 +sudo,0,0,0,0 +systemd,0,0,0,0 +timesync,0,0,0,0 +tlog,0,0,0,0 +trustee_client,0,0,0,0 +trustee_server,0,0,0,0 +vpn,0,0,0,0 +hpc,0,0,0,0 +ee_linux_system_roles,0,0,0,0 +ee_linux_automation,0,0,0,0 +tox-lsr,0,0,0,0 +pam_pwd,0,0,0,0 +nbde_client,1,0,1,0 +mssql,0,0,0,0 +ci-testing,0,0,0,0 +lsr-woke-action,0,0,0,0 +tuned,0,0,0,0 +sap-base-settings,0,0,0,0 +image_builder,0,0,0,0 +sap-preconfigure,0,0,0,0 +sap-netweaver-preconfigure,0,0,0,0 +sap-hana-preconfigure,0,0,0,0 +lsr-gh-action-py26,0,0,0,0 +meta_test,0,0,0,0 +experimental-azure-firstboot,0,0,0,0 +ansible-sshd,0,1,0,1 diff --git a/data/2026-Q2/prs.csv b/data/2026-Q2/prs.csv new file mode 100644 index 0000000..17b77c6 --- /dev/null +++ b/data/2026-Q2/prs.csv @@ -0,0 +1,54 @@ +Role,PRs Created,PRs Merged,PRs open,Created non-maint,Merged non-maint,Open non-maint +ad_integration,4,3,1,0,0,0 +aide,2,2,0,0,0,0 +auditd,3,2,0,0,0,0 +bootloader,2,2,0,0,0,0 +certificate,2,2,0,0,0,0 +cockpit,1,1,0,0,0,0 +crypto_policies,1,1,0,0,0,0 +fapolicyd,1,1,0,0,0,0 +firewall,3,3,0,0,0,0 +gfs2,1,1,0,0,0,0 +ha_cluster,9,8,0,5,5,0 +journald,2,1,1,1,0,1 +kdump,1,1,0,0,0,0 +kernel_settings,2,2,0,0,0,0 +keylime_server,1,1,0,0,0,0 +logging,1,1,0,0,0,0 +metrics,2,2,0,0,0,0 +nbde_server,3,3,0,0,0,0 +network,7,7,0,1,1,0 +podman,5,5,0,0,0,0 +postfix,2,2,0,0,0,0 +postgresql,3,3,0,0,0,0 +rhc,4,4,0,0,0,0 +selinux,2,2,0,0,0,0 +snapshot,5,5,0,0,0,0 +ssh,1,1,0,0,0,0 +storage,5,5,0,0,0,0 +sudo,2,2,0,1,1,0 +systemd,1,1,0,0,0,0 +timesync,2,2,0,0,0,0 +tlog,2,2,0,0,0,0 +trustee_client,6,6,0,0,0,0 +trustee_server,6,6,0,0,0,0 +vpn,2,2,0,0,0,0 +hpc,13,11,0,2,1,0 +ee_linux_system_roles,0,0,0,0,0,0 +ee_linux_automation,0,0,0,0,0,0 +tox-lsr,4,4,0,0,0,0 +pam_pwd,1,1,0,0,0,0 +nbde_client,4,4,0,0,0,0 +mssql,6,3,1,2,0,1 +ci-testing,0,0,0,0,0,0 +lsr-woke-action,0,0,0,0,0,0 +tuned,0,0,0,0,0,0 +sap-base-settings,0,0,0,0,0,0 +image_builder,0,0,0,0,0,0 +sap-preconfigure,0,0,0,0,0,0 +sap-netweaver-preconfigure,0,0,0,0,0,0 +sap-hana-preconfigure,0,0,0,0,0,0 +lsr-gh-action-py26,0,0,0,0,0,0 +meta_test,0,0,0,0,0,0 +experimental-azure-firstboot,0,0,0,0,0,0 +ansible-sshd,4,3,0,4,3,0 diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..711d229 --- /dev/null +++ b/data/README.md @@ -0,0 +1,111 @@ +# Data Directory Structure + +This directory contains historical metrics data and quarterly summaries. + +## Directory Structure + +``` +data/ +├── YYYY-QN/ # Quarter-specific detailed data +│ ├── prs.csv # Per-repo PR statistics +│ ├── issues.csv # Per-repo issue statistics +│ ├── galaxy_legacy.csv # Per-role legacy downloads (cumulative) +│ └── galaxy_collections.csv # Per-collection downloads (cumulative) +│ +├── github_prs_summary.csv # Historical PR totals by quarter +├── github_issues_summary.csv # Historical issue totals by quarter +├── galaxy_legacy_summary.csv # Historical legacy downloads (cumulative totals) +├── galaxy_collections_summary.csv # Historical collection downloads (quarterly deltas) +└── galaxy_collections_cumulative.csv # Cumulative tracking for delta calculation +``` + +## File Descriptions + +### Quarter-Specific Data (data/YYYY-QN/) + +Detailed per-repository/per-role data collected each quarter. + +**prs.csv** - GitHub pull request statistics per repository +- Columns: Role, PRs Created, PRs Merged, PRs open, Created non-maint, Merged non-maint, Open non-maint + +**issues.csv** - GitHub issue statistics per repository +- Columns: Role, Issues Created, Issues Closed, Created non-maint, Closed non-maint + +**galaxy_legacy.csv** - Ansible Galaxy legacy role downloads +- Columns: name, download_count +- Note: download_count is CUMULATIVE (total downloads since role creation) + +**galaxy_collections.csv** - Ansible Galaxy collection downloads +- Columns: namespace, name, full_name, download_count +- Note: download_count is CUMULATIVE (total downloads since collection creation) + +### Summary Files (Historical Aggregates) + +**github_prs_summary.csv** - Aggregated PR statistics by quarter +- Columns: Quarter, PRs Created, PRs Merged, PRs Open, External PRs Created, External PRs Merged, External PRs Open +- Values: Sum of all repositories for that quarter + +**github_issues_summary.csv** - Aggregated issue statistics by quarter +- Columns: Quarter, Issues Created, Issues Closed, External Issues Created, External Issues Closed +- Values: Sum of all repositories for that quarter + +**galaxy_legacy_summary.csv** - Total legacy role downloads by quarter +- Columns: Quarter, Total Downloads +- Values: CUMULATIVE total (sum of all roles' cumulative downloads) +- Use for tracking overall growth trend + +**galaxy_collections_summary.csv** - Collection downloads per quarter +- Columns: Quarter, fedora.linux_system_roles, microsoft.sql, Total Downloads +- Values: QUARTERLY DELTAS (new downloads in that quarter, not cumulative) +- Calculated by subtracting previous quarter's cumulative from current quarter's cumulative + +**galaxy_collections_cumulative.csv** - Cumulative tracking (internal use) +- Columns: Quarter, fedora.linux_system_roles, microsoft.sql +- Values: CUMULATIVE totals at end of each quarter +- Used by update_quarterly_summary.py to calculate deltas +- Automatically updated when running quarterly workflow + +## Workflow + +When you run `make quarterly-report QUARTER=YYYY-QN`: + +1. **Data Collection:** + - Collects detailed data → saves to `data/YYYY-QN/*.csv` + - These files contain per-repo/per-role details + +2. **Summary Update:** + - Runs `update_quarterly_summary.py` + - Aggregates detailed data into totals + - For GitHub: simple sum of all repos + - For Galaxy Collections: calculates delta from cumulative tracking + - Updates the 4 summary CSV files + +3. **Graph Generation:** + - Reads from summary CSV files + - Generates graphs showing historical trends + - Saves graphs to `reports/images/` + +4. **Report Generation:** + - Reads from summary CSVs and current quarter data + - Generates Markdown report with embedded graphs + - Saves to `reports/YYYY-QN.md` + +## Adding Historical Data + +To add historical data: + +1. **GitHub PRs/Issues:** Add rows to `github_prs_summary.csv` and `github_issues_summary.csv` + +2. **Galaxy Legacy:** Add rows to `galaxy_legacy_summary.csv` with cumulative totals + +3. **Galaxy Collections:** + - Add cumulative totals to `galaxy_collections_cumulative.csv` + - Add quarterly deltas to `galaxy_collections_summary.csv` + - Calculate deltas: current_quarter_cumulative - previous_quarter_cumulative + +## Notes + +- Quarter format: `YYYY-QN` (e.g., `2024-Q4`) +- All CSVs use standard CSV format with headers +- Summary files are sorted by quarter (oldest to newest) +- The system automatically maintains consistency between files diff --git a/data/galaxy_collections_cumulative.csv b/data/galaxy_collections_cumulative.csv new file mode 100644 index 0000000..27c735e --- /dev/null +++ b/data/galaxy_collections_cumulative.csv @@ -0,0 +1,10 @@ +Quarter,fedora.linux_system_roles,microsoft.sql +2024-Q2,581950,43048 +2024-Q3,760466,47644 +2024-Q4,944286,50594 +2025-Q1,1191513,57856 +2025-Q2,1489194,64665 +2025-Q3,1841521,72223 +2025-Q4,2155471,90242 +2026-Q1,2500611,102551 +2026-Q2,2572336,112578 diff --git a/data/galaxy_collections_summary.csv b/data/galaxy_collections_summary.csv new file mode 100644 index 0000000..cebbf98 --- /dev/null +++ b/data/galaxy_collections_summary.csv @@ -0,0 +1,9 @@ +Quarter,fedora.linux_system_roles,microsoft.sql,Total Downloads +2024-Q3,178516,4596,183112 +2024-Q4,183820,2950,186770 +2025-Q1,247227,7262,254489 +2025-Q2,297681,6809,304490 +2025-Q3,352327,7558,359885 +2025-Q4,313950,18019,331969 +2026-Q1,369140,20309,389449 +2026-Q2,71725,10027,81752 diff --git a/data/galaxy_legacy_per_role_history.csv b/data/galaxy_legacy_per_role_history.csv new file mode 100644 index 0000000..6f131d8 --- /dev/null +++ b/data/galaxy_legacy_per_role_history.csv @@ -0,0 +1,40 @@ +Role,2024-Q2,2024-Q3,2024-Q4,2025-Q1,2025-Q2,2025-Q3,2025-Q4,2026-Q1,2026-Q2 +ad_integration,1939.0,2057.0,2186.0,2973.0,3833.0,5679.0,6635.0,9364.0,10329 +aide,,,10.0,20.0,51.0,57.0,117.0,151.0,161 +bootloader,13107.0,33376.0,54054.0,83028.0,100856.0,134197.0,148269.0,169124.0,173621 +certificate,2802.0,4604.0,5373.0,7629.0,10068.0,13149.0,16073.0,19776.0,20473 +cockpit,119964.0,140254.0,159504.0,170083.0,176608.0,187289.0,197714.0,209019.0,210674 +crypto_policies,55355.0,63712.0,72739.0,82589.0,93245.0,108984.0,123909.0,140400.0,142654 +fapolicyd,15.0,22.0,30.0,381.0,830.0,1317.0,1465.0,1693.0,1767 +firewall,59228.0,62329.0,63999.0,66606.0,68855.0,74352.0,79973.0,89187.0,90725 +gfs2,9.0,13.0,21.0,30.0,43.0,45.0,52.0,86.0,86 +ha_cluster,1701.0,1854.0,1906.0,2136.0,2290.0,2583.0,2820.0,3080.0,3100 +hpc,,,,,,2.0,9.0,31.0,31 +image_builder,2889.0,2896.0,2905.0,3031.0,3094.0,3141.0,3931.0,4190.0,4304 +journald,16429.0,37060.0,58077.0,87360.0,105400.0,140491.0,157354.0,183325.0,188266 +kdump,36385.0,40156.0,42162.0,44064.0,45337.0,46114.0,47320.0,49360.0,49580 +kernel_settings,4755.0,4998.0,5209.0,5761.0,6270.0,6717.0,7340.0,8263.0,8456 +keylime_server,15.0,19.0,26.0,33.0,41.0,48.0,55.0,82.0,82 +logging,1447.0,1507.0,1772.0,2146.0,2311.0,2523.0,3545.0,4155.0,4363 +metrics,3322.0,3387.0,3408.0,3540.0,3609.0,3679.0,4484.0,4764.0,4881 +nbde_client,7414.0,9274.0,9876.0,10518.0,11030.0,13631.0,18929.0,29359.0,30683 +nbde_server,5296.0,7139.0,7738.0,8365.0,8861.0,11389.0,16481.0,26664.0,27942 +network,229496.0,241486.0,251104.0,263428.0,276585.0,298601.0,319754.0,343090.0,346387 +pam_pwd,59.0,73.0,503.0,836.0,1491.0,3360.0,4979.0,6037.0,6332 +podman,1491.0,1611.0,1900.0,2213.0,2637.0,4655.0,7582.0,11134.0,11446 +postfix,115836.0,116962.0,118752.0,122501.0,125337.0,130739.0,136279.0,144807.0,146101 +postgresql,3345.0,4109.0,4776.0,5400.0,6065.0,6864.0,7491.0,8149.0,8246 +rhc,1468.0,2179.0,2972.0,4998.0,7309.0,12289.0,17093.0,25442.0,26508 +selinux,131885.0,139481.0,144182.0,153520.0,159901.0,170548.0,179788.0,189789.0,191690 +snapshot,3.0,8.0,15.0,22.0,31.0,48.0,58.0,90.0,90 +ssh,1381.0,1401.0,1428.0,1581.0,1730.0,2224.0,3461.0,4209.0,4371 +sshd,148997.0,204841.0,260198.0,322480.0,381876.0,464483.0,546739.0,640123.0,650985 +storage,78617.0,85352.0,90633.0,96214.0,100126.0,104374.0,108109.0,111580.0,112064 +sudo,46.0,105.0,153.0,170.0,215.0,264.0,709.0,1035.0,1125 +systemd,107.0,454.0,768.0,1829.0,2970.0,6674.0,9902.0,14945.0,15566 +timesync,562478.0,620621.0,679959.0,742285.0,784023.0,844359.0,883525.0,913662.0,920898 +tlog,2360.0,2397.0,2406.0,2414.0,2436.0,2446.0,2456.0,2485.0,2485 +trustee_client,,,,,,,,,0 +trustee_server,,,,,,,,,0 +tuned,19294.0,20035.0,20417.0,20694.0,20986.0,21224.0,22081.0,22564.0,22772 +vpn,177.0,184.0,197.0,327.0,373.0,428.0,1228.0,1515.0,1634 diff --git a/data/galaxy_legacy_summary.csv b/data/galaxy_legacy_summary.csv new file mode 100644 index 0000000..faaf08a --- /dev/null +++ b/data/galaxy_legacy_summary.csv @@ -0,0 +1,10 @@ +Quarter,Total Downloads +2024-Q2,1628935 +2024-Q3,1855956 +2024-Q4,2071358 +2025-Q1,2321205 +2025-Q2,2516723 +2025-Q3,2828967 +2025-Q4,3087709 +2026-Q1,3353729 +2026-Q2,3440878 diff --git a/data/github_issues_summary.csv b/data/github_issues_summary.csv new file mode 100644 index 0000000..61f09a1 --- /dev/null +++ b/data/github_issues_summary.csv @@ -0,0 +1,13 @@ +Quarter,Issues Created,Issues Closed,External Issues Created,External Issues Closed +2023-Q3,17,12,15,10 +2023-Q4,21,11,15,10 +2024-Q1,44,16,22,11 +2024-Q2,16,5,11,4 +2024-Q3,14,3,10,2 +2024-Q4,15,8,10,6 +2025-Q1,10,4,10,4 +2025-Q2,22,9,16,6 +2025-Q3,25,14,21,13 +2025-Q4,12,6,12,6 +2026-Q1,14,3,13,3 +2026-Q2,7,2,7,2 diff --git a/data/github_prs_summary.csv b/data/github_prs_summary.csv new file mode 100644 index 0000000..2ea6386 --- /dev/null +++ b/data/github_prs_summary.csv @@ -0,0 +1,13 @@ +Quarter,PRs Created,PRs Merged,PRs Open,External PRs Created,External PRs Merged,External PRs Open +2023-Q3,124,118,0,5,4,0 +2023-Q4,190,180,2,46,42,1 +2024-Q1,94,86,3,18,17,1 +2024-Q2,88,78,3,9,5,2 +2024-Q3,130,81,6,15,10,3 +2024-Q4,116,77,0,4,4,0 +2025-Q1,59,44,3,12,5,2 +2025-Q2,142,116,12,16,12,3 +2025-Q3,110,93,5,26,15,2 +2025-Q4,69,66,2,18,16,1 +2026-Q1,175,161,1,22,20,1 +2026-Q2,128,118,3,16,11,2 diff --git a/reports/2026-Q2-analysis.md b/reports/2026-Q2-analysis.md new file mode 100644 index 0000000..88dc50b --- /dev/null +++ b/reports/2026-Q2-analysis.md @@ -0,0 +1,96 @@ +# Linux System Roles - 2026-Q2 Quarterly Metrics Analysis + +**Report Date:** May 20, 2026 +**Status:** ⚠️ Partial quarter data (~55% complete through May 20) + +--- + +## Executive Summary + +Q2 2026 demonstrates strong PR efficiency (94.4% merge rate, up from 92.5%) and healthy PR volume growth (projecting +33% QoQ), but faces two critical challenges: Galaxy downloads declining sharply across both legacy roles (-40% projected) and collections (-62% projected), and external PR acceptance dropped 16.6 percentage points to 78.6%. + +--- + +## Key Metrics + +| Metric | Q2 2026 (Partial) | Q2 Projected | Q1 2026 | Change | +|--------|-------------------|--------------|---------|--------| +| **PRs Created** | 128 | ~233 | 175 | +33% ✓ | +| **PRs Merged** | 118 | ~215 | 161 | +33% ✓ | +| **PR Merge Rate** | 94.4% | ~94% | 92.5% | +1.9pp ✓ | +| **External PRs Created** | 16 | ~29 | 22 | +32% ✓ | +| **External Acceptance** | 78.6% | ~78% | 95.2% | **-16.6pp** ⚠️ | +| **External % of Total** | 12.5% | ~12.5% | 12.6% | -0.1pp | +| **Issues Created** | 7 | ~13 | 14 | -7% | +| **Issues Closed** | 2 | ~4 | 3 | +33% ✓ | +| **Issue Resolution Rate** | 28.6% | ~31% | 21.4% | +7.2pp ✓ | +| **Galaxy Legacy Downloads** | 87K | ~158K | 266K | **-40%** ⚠️ | +| **Galaxy Collections** | 82K | ~149K | 389K | **-62%** ⚠️ | + +--- + +## Highlights + +- **PR volume trending up strongly**: Projecting 233 PRs for full quarter, a 33% increase over Q1's 175 PRs—on track for highest quarterly PR count in recent history +- **Excellent merge efficiency**: 94.4% merge rate shows team is processing contributions effectively, up from 92.5% in Q1 +- **Issue resolution improving**: Resolution rate increased to 28.6% from 21.4% in Q1, showing better responsiveness to user-reported issues +- **Strong external engagement**: External PRs account for 12.5% of all PRs, and 100% of issues are externally reported, demonstrating active community participation + +--- + +## Top Downloaded Roles + +Based on cumulative downloads as of May 20, 2026: + +1. **timesync**: 920,898 downloads +2. **sshd**: 650,985 downloads +3. **network**: 346,387 downloads +4. **cockpit**: 210,674 downloads +5. **journald**: 188,266 downloads + +*Note: Cannot calculate fastest growing/declining roles for Q2 without Q1 per-role baseline data.* + +--- + +## Top Concerns + +1. **Galaxy downloads collapsing across both platforms**: Legacy roles tracking for -40% QoQ decline (266K → ~158K projected) with daily download rate dropping from 2,956/day to 1,743/day. Collections facing even steeper -62% decline (389K → ~149K projected). This is too significant to be a data artifact—requires immediate investigation into potential causes: Ansible Galaxy API changes, distribution packaging updates, emerging competitor tools, or user migration patterns. + +2. **External PR acceptance rate dropped significantly**: Only 78.6% of external PRs merged (excluding those still under review) compared to 95.2% in Q1, a 16.6 percentage point decline. Five external PRs were created but not merged, suggesting either declining PR quality, unclear contribution guidelines, or stricter review standards. This could discourage future community contributions. + +3. **Low issue creation rate**: Only 7 issues created (projecting ~13 for full quarter vs 14 in Q1). While not an immediate crisis, this could indicate users aren't reporting problems, have found alternative channels, or are moving away from the project. Worth investigating whether issue reporting barriers exist. + +4. **External contribution funnel at risk**: Combined effect of lower external PR acceptance (78.6%) and low issue reporting creates risk to community health. Need to ensure external contributors feel supported and valued. + +--- + +## Recommendations + +1. **Immediate (next 2 weeks)**: Launch investigation into Galaxy download decline root cause: + - Check Ansible Galaxy service status, API changes, or policy updates + - Review distribution packaging for Fedora/RHEL—any changes to role delivery methods? + - Analyze download patterns by role to identify if specific roles are affected + - Contact major known users to understand consumption pattern changes + - Check competitor landscape for new alternative role sources + +2. **Short-term (by end of Q2)**: Improve external contributor experience to reverse acceptance rate decline: + - Review the 5 unmerged external PRs to identify common patterns (test failures, style issues, unclear requirements) + - Update CONTRIBUTING.md with specific examples, requirements, and common pitfalls + - Consider adding PR template with pre-submission checklist + - Set up weekly download monitoring dashboard with alert thresholds + +3. **Ongoing**: Monitor and strengthen community health metrics: + - Track external contribution funnel: PRs created → merged → acceptance rate (target: return to >90%) + - Monitor download trends by individual role to identify which are most affected + - Reach out to users to understand issue reporting patterns and barriers + - Set quarterly goal for issue resolution rate (target: >50% by Q3) + +4. **Follow-up**: Based on Galaxy investigation findings, develop recovery strategy: + - If technical issues: work with Ansible Galaxy team to resolve + - If distribution changes: collaborate with distros to improve discoverability + - If user migration: understand why and adapt project direction + - If competitor emergence: assess feature gaps and competitive positioning + +--- + +**📄 Report saved to:** `reports/2026-Q2-analysis.md` diff --git a/reports/images/galaxy-collection-fedora-linux_system_roles.png b/reports/images/galaxy-collection-fedora-linux_system_roles.png new file mode 100644 index 0000000..c84f613 Binary files /dev/null and b/reports/images/galaxy-collection-fedora-linux_system_roles.png differ diff --git a/reports/images/galaxy-collection-microsoft-sql.png b/reports/images/galaxy-collection-microsoft-sql.png new file mode 100644 index 0000000..9d59904 Binary files /dev/null and b/reports/images/galaxy-collection-microsoft-sql.png differ diff --git a/reports/images/galaxy-legacy-per-role-2026-Q2.png b/reports/images/galaxy-legacy-per-role-2026-Q2.png new file mode 100644 index 0000000..4162f39 Binary files /dev/null and b/reports/images/galaxy-legacy-per-role-2026-Q2.png differ diff --git a/reports/images/galaxy-legacy-per-role-delta-2026-Q2.png b/reports/images/galaxy-legacy-per-role-delta-2026-Q2.png new file mode 100644 index 0000000..00cb569 Binary files /dev/null and b/reports/images/galaxy-legacy-per-role-delta-2026-Q2.png differ diff --git a/reports/images/galaxy-legacy-total-delta.png b/reports/images/galaxy-legacy-total-delta.png new file mode 100644 index 0000000..e41f443 Binary files /dev/null and b/reports/images/galaxy-legacy-total-delta.png differ diff --git a/reports/images/galaxy-legacy-total.png b/reports/images/galaxy-legacy-total.png new file mode 100644 index 0000000..291abae Binary files /dev/null and b/reports/images/galaxy-legacy-total.png differ diff --git a/reports/images/github-issues.png b/reports/images/github-issues.png new file mode 100644 index 0000000..a659818 Binary files /dev/null and b/reports/images/github-issues.png differ diff --git a/reports/images/github-prs.png b/reports/images/github-prs.png new file mode 100644 index 0000000..d1d5c0c Binary files /dev/null and b/reports/images/github-prs.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4dcc986 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +# Data visualization +matplotlib>=3.7.0 +seaborn>=0.12.0 + +# Data processing +pandas>=2.0.0 + +# Web scraping for Galaxy collections +beautifulsoup4>=4.12.0 +requests>=2.31.0 +lxml>=4.9.0 diff --git a/scripts/collect_all_github_stats.sh b/scripts/collect_all_github_stats.sh new file mode 100755 index 0000000..2643b27 --- /dev/null +++ b/scripts/collect_all_github_stats.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Wrapper script to collect GitHub stats from multiple organizations +# Reads config.yaml and collects stats for each org, combining into single CSV files + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(dirname "$SCRIPT_DIR")" + +# Check required environment variables +if [ -z "$QUARTER" ]; then + echo "ERROR: QUARTER environment variable not set" + exit 1 +fi + +if [ -z "$DATE_RANGE" ]; then + echo "ERROR: DATE_RANGE environment variable not set" + exit 1 +fi + +DATA_DIR="$ROOT_DIR/data/$QUARTER" +mkdir -p "$DATA_DIR" + +PRS_CSV="$DATA_DIR/prs.csv" +ISSUES_CSV="$DATA_DIR/issues.csv" + +# Initialize CSV files with headers +echo "Role,PRs Created,PRs Merged,PRs open,Created non-maint,Merged non-maint,Open non-maint" > "$PRS_CSV" +echo "Role,Issues Created,Issues Closed,Created non-maint,Closed non-maint" > "$ISSUES_CSV" + +echo "Collecting GitHub statistics for $QUARTER ($DATE_RANGE)..." +echo "" + +# Organization 1: linux-system-roles (all repos except excluded) +echo "================================================" +echo "Collecting from linux-system-roles organization" +echo "================================================" + +# Get all repos from linux-system-roles org +REPOS=$(gh repo list linux-system-roles -L 100 --json name -q '.[].name') + +# Exclusion list from config.yaml +EXCLUDE_REPOS="tft-tests test-harness auto-maintenance linux-system-roles.github.io .github template linux-system-roles-upstream-metrics" + +for repo in $REPOS; do + # Check if repo is in exclusion list + skip=false + for excluded in $EXCLUDE_REPOS; do + if [ "$repo" = "$excluded" ]; then + skip=true + break + fi + done + + if [ "$skip" = true ]; then + continue + fi + + echo " $repo" + + # Call collect_github_stats.sh for this repo + upstream_org=linux-system-roles \ + repo="$repo" \ + DATE_RANGE="$DATE_RANGE" \ + PRS_CSVFILE="$PRS_CSV" \ + ISSUES_CSVFILE="$ISSUES_CSV" \ + "$SCRIPT_DIR/collect_github_stats.sh" +done + +echo "" +echo "✓ linux-system-roles data collected" +echo "" + +# Organization 2: willshersystems/ansible-sshd +echo "================================================" +echo "Collecting from willshersystems/ansible-sshd" +echo "================================================" + +upstream_org=willshersystems \ +repo=ansible-sshd \ +DATE_RANGE="$DATE_RANGE" \ +PRS_CSVFILE="$PRS_CSV" \ +ISSUES_CSVFILE="$ISSUES_CSV" \ +"$SCRIPT_DIR/collect_github_stats.sh" + +echo "" +echo "✓ willshersystems/ansible-sshd data collected" +echo "" + +# Summary +echo "================================================" +echo "✅ GitHub statistics collection complete" +echo "================================================" +echo "PRs CSV: $PRS_CSV" +echo "Issues CSV: $ISSUES_CSV" +echo "" +echo "Total repositories:" +wc -l < "$PRS_CSV" | xargs echo -n +echo " entries (including header)" diff --git a/scripts/collect_galaxy_stats.py b/scripts/collect_galaxy_stats.py new file mode 100755 index 0000000..3a14637 --- /dev/null +++ b/scripts/collect_galaxy_stats.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +""" +Collect Ansible Galaxy statistics for Linux System Roles. + +This script collects: +1. Legacy roles download counts via Galaxy API v1 +2. Collection download counts via Galaxy API v3 + +Output: +- data/{quarter}/galaxy_legacy.csv +- data/{quarter}/galaxy_collections.csv +""" + +import os +import sys +import csv +import time +import requests +from pathlib import Path + +SCRIPT_DIR = Path(__file__).parent +ROOT_DIR = SCRIPT_DIR.parent + +# Configuration +LEGACY_API_URL = "https://galaxy.ansible.com/api/v1/roles" +LEGACY_NAMESPACE = "linux-system-roles" +LEGACY_PAGE_SIZE = 50 +LEGACY_EXCLUDE = ["template", "mssql"] +LEGACY_ADDITIONAL_ROLES = [ + {"namespace": "willshersystems", "name": "sshd"} +] + +COLLECTIONS = [ + {"namespace": "fedora", "name": "linux_system_roles"}, + {"namespace": "microsoft", "name": "sql"} +] + + +def retry_request(url, headers=None, params=None, max_attempts=3): + """Make HTTP request with retry logic for transient failures""" + delay = 5 + for attempt in range(1, max_attempts + 1): + try: + response = requests.get(url, headers=headers, params=params, timeout=30) + response.raise_for_status() + return response + except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: + if attempt < max_attempts: + print(f" Request failed (attempt {attempt}/{max_attempts}), retrying in {delay}s...") + time.sleep(delay) + delay *= 2 + else: + raise + except requests.exceptions.HTTPError as e: + # Don't retry on HTTP errors (4xx, 5xx) - they're not transient + raise + + +def collect_legacy_roles(api_key): + """Collect download counts for legacy individual roles via API""" + print("Collecting Galaxy legacy roles statistics...") + + api_url = LEGACY_API_URL + namespace = LEGACY_NAMESPACE + page_size = LEGACY_PAGE_SIZE + exclude = LEGACY_EXCLUDE + + # Prepare API request + headers = { + 'accept': 'application/json', + } + + if api_key: + headers['Authorization'] = f'Token {api_key}' + + params = { + 'namespace': namespace, + 'page_size': page_size + } + + # Fetch ALL roles from API (handle pagination) + all_roles = [] + page = 1 + while True: + params['page'] = page + response = retry_request(api_url, headers=headers, params=params) + + data = response.json() + roles = data.get('results', []) + + if not roles: + break + + all_roles.extend(roles) + + # Check if there's a next page + if not data.get('next'): + break + + page += 1 + print(f" Fetching page {page}...") + + print(f" Total roles found: {len(all_roles)}") + + # Filter and collect role data + role_data = [] + for role in all_roles: + role_name = role['name'] + + # Skip excluded roles + if role_name in exclude: + print(f" Skipping excluded role: {role_name}") + continue + + download_count = role.get('download_count', 0) + role_data.append({ + 'name': role_name, + 'download_count': download_count + }) + print(f" {role_name}: {download_count:,} downloads") + + # Collect additional roles from other namespaces + additional_roles = LEGACY_ADDITIONAL_ROLES + if additional_roles: + print(f"\nCollecting additional roles from other namespaces...") + for additional in additional_roles: + add_namespace = additional['namespace'] + add_name = additional['name'] + + # Fetch this specific role using query parameters + try: + add_params = { + 'namespace': add_namespace, + 'name': add_name + } + response = retry_request(api_url, headers, add_params) + data = response.json() + + results = data.get('results', []) + if results: + role_info = results[0] + download_count = role_info.get('download_count', 0) + role_data.append({ + 'name': add_name, + 'download_count': download_count + }) + print(f" {add_namespace}/{add_name}: {download_count:,} downloads") + else: + print(f" WARNING: {add_namespace}/{add_name} not found") + except Exception as e: + print(f" ERROR fetching {add_namespace}/{add_name}: {e}") + + # Sort by name + role_data.sort(key=lambda x: x['name']) + + return role_data + + +def get_collection_downloads_from_api(namespace, name): + """Get download count from Galaxy API v3""" + api_url = f"https://galaxy.ansible.com/api/v3/plugin/ansible/content/published/collections/index/{namespace}/{name}/" + print(f" Fetching from API: {namespace}.{name}") + + try: + response = retry_request(api_url) + + data = response.json() + download_count = data.get('download_count', 0) + + print(f" Found: {download_count:,} downloads") + return download_count + + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + raise RuntimeError(f"Collection not found: {namespace}.{name} (404)") from e + else: + raise RuntimeError(f"Failed to fetch {namespace}.{name}: HTTP {e.response.status_code}") from e + except Exception as e: + raise RuntimeError(f"Failed to fetch {namespace}.{name}: {e}") from e + + +def collect_collections(): + """Collect download counts for collections via Galaxy API""" + print("Collecting Galaxy collections statistics...") + + collections = COLLECTIONS + + collection_data = [] + for collection in collections: + namespace = collection['namespace'] + name = collection['name'] + + print(f" Collection: {namespace}.{name}") + download_count = get_collection_downloads_from_api(namespace, name) + + collection_data.append({ + 'namespace': namespace, + 'name': name, + 'full_name': f"{namespace}.{name}", + 'download_count': download_count + }) + + return collection_data + + +def write_csv(filepath, data, fieldnames): + """Write data to CSV file""" + filepath.parent.mkdir(parents=True, exist_ok=True) + + with open(filepath, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(data) + + print(f"Wrote {len(data)} rows to {filepath}") + + +def main(): + # Get quarter from environment + quarter = os.getenv('QUARTER') + if not quarter: + print("ERROR: QUARTER environment variable not set") + print("Usage: QUARTER=2025-Q1 GALAXY_API_KEY=xxx python collect_galaxy_stats.py") + sys.exit(1) + + # Get API key from environment + api_key = os.getenv('GALAXY_API_KEY') + if not api_key: + print("WARNING: GALAXY_API_KEY not set, API requests may be rate-limited") + + # Output directory + output_dir = ROOT_DIR / "data" / quarter + output_dir.mkdir(parents=True, exist_ok=True) + + # Collect legacy roles + legacy_roles = collect_legacy_roles(api_key) + legacy_csv = output_dir / "galaxy_legacy.csv" + write_csv(legacy_csv, legacy_roles, ['name', 'download_count']) + + # Collect collections + collections = collect_collections() + collections_csv = output_dir / "galaxy_collections.csv" + write_csv(collections_csv, collections, ['namespace', 'name', 'full_name', 'download_count']) + + print(f"\n✅ Galaxy statistics collection complete for {quarter}") + print(f" Legacy roles: {legacy_csv}") + print(f" Collections: {collections_csv}") + + +if __name__ == '__main__': + main() diff --git a/scripts/collect_github_stats.sh b/scripts/collect_github_stats.sh new file mode 100755 index 0000000..75e3476 --- /dev/null +++ b/scripts/collect_github_stats.sh @@ -0,0 +1,206 @@ +#!/bin/bash + +# collect stats about PRs and issues in the given time frame +# by default, look for "external contributors" not role maintainers + +set -euo pipefail + +# Retry a command with exponential backoff on network errors +retry_command() { + local max_attempts=3 + local attempt=1 + local delay=5 + local exit_code=0 + + while [ $attempt -le $max_attempts ]; do + if "$@"; then + return 0 + else + exit_code=$? + + # Check if we should retry (network/timeout errors) + if [ $attempt -lt $max_attempts ]; then + echo " Attempt $attempt failed, retrying in ${delay}s..." >&2 + sleep $delay + delay=$((delay * 2)) + fi + + attempt=$((attempt + 1)) + fi + done + + echo " Command failed after $max_attempts attempts" >&2 + return $exit_code +} + +# "cache" of user lookups +# assumes that if someone is a collaborator on any system roles repo, +# that person is a collaborator on all system roles repos +# this is generally the case for PRs +declare -A USERS +# how many PRs were created for the given role +declare -A PRS_CREATED +# how many PRs were merged +declare -A PRS_MERGED +# how many PRs are open +declare -A PRS_OPEN +# how many PRs were created by non-maintainers +declare -A PRS_CREATED_NON_MAINT +# how many PRs were merged from non-maintainers +declare -A PRS_MERGED_NON_MAINT +# how many PRs are open from non-maintainers +declare -A PRS_OPEN_NON_MAINT +# how many issues were created for the given role +declare -A ISSUES_CREATED +# how many issues were closed +declare -A ISSUES_CLOSED +# how many issues were created by non-maintainers +declare -A ISSUES_CREATED_NON_MAINT +# how many issues were closed from non-maintainers +declare -A ISSUES_CLOSED_NON_MAINT + +# silence shellcheck about unset vars +upstream_org="${upstream_org:?upstream_org is unset}" +repo="${repo:?repo is unset}" +if [ -z "${DATE_RANGE:-}" ]; then + echo ERROR: Please specify DATE_RANGE like this + echo DATE_RANGE=2024-01-01..2024-06-30 + exit 1 +fi + +# is the given user a role repo maintainer +user_is_maintainer() { + local username + username="$1" + if [ -z "${USERS[$username]:-}" ]; then + if retry_command gh api --silent \ + "/repos/$upstream_org/$repo/collaborators/$username" 2> /dev/null; then + USERS["$username"]=0 + else + USERS["$username"]=1 + fi + fi + return "${USERS[$username]}" +} + +# get PRs with retry +get_prs() { + retry_command gh pr list -R "$upstream_org/$repo" \ + -S "created:$DATE_RANGE" \ + --state all \ + --json number,author,state,title \ + --jq '.[] | "\(.number) \(.author.login) \(.author.is_bot) \(.state) \(.title)"' +} + +# get issues created in date range +get_issues_created() { + retry_command gh issue list -R "$upstream_org/$repo" \ + -S "created:$DATE_RANGE" \ + --state all \ + --json number,author,state \ + --jq '.[] | "\(.number) \(.author.login) \(.author.is_bot) \(.state)"' +} + +# get issues closed in date range +get_issues_closed() { + retry_command gh issue list -R "$upstream_org/$repo" \ + -S "closed:$DATE_RANGE" \ + --state closed \ + --json number,author \ + --jq '.[] | "\(.number) \(.author.login) \(.author.is_bot)"' +} + +get_prs > prs.txt +while read -r number author is_bot state title; do + # exclude bot PRs completely + if [[ "$is_bot" == "true" ]]; then + continue + fi + # exclude changelog, ci related, and automated citest_skip prs + if [[ "$title" =~ ^ci: ]]; then + continue + fi + if [[ "$title" =~ ^docs\(changelog\) ]]; then + continue + fi + if [[ "$title" =~ \[citest_skip\] ]]; then + continue + fi + PRS_CREATED["$repo"]=$(("${PRS_CREATED[$repo]:-0}" + 1)) + # see if author is a maintainer + if ! user_is_maintainer "$author"; then + PRS_CREATED_NON_MAINT["$repo"]=$(("${PRS_CREATED_NON_MAINT[$repo]:-0}" + 1)) + fi + case "$state" in + MERGED) PRS_MERGED["$repo"]=$(("${PRS_MERGED[$repo]:-0}" + 1)) + if ! user_is_maintainer "$author"; then + PRS_MERGED_NON_MAINT["$repo"]=$(("${PRS_MERGED_NON_MAINT[$repo]:-0}" + 1)) + fi ;; + CLOSED) : ;; # PR closed without merging - no action needed + OPEN) PRS_OPEN["$repo"]=$(("${PRS_OPEN[$repo]:-0}" + 1)) + if ! user_is_maintainer "$author"; then + PRS_OPEN_NON_MAINT["$repo"]=$(("${PRS_OPEN_NON_MAINT[$repo]:-0}" + 1)) + fi ;; + *) : ;; # Unknown state - ignore + esac +done < prs.txt +rm -f prs.txt + +# Count issues created in the date range +get_issues_created > issues_created.txt +# shellcheck disable=SC2034 +while read -r number author is_bot state; do + # exclude bot issues completely + if [[ "$is_bot" == "true" ]]; then + continue + fi + ISSUES_CREATED["$repo"]=$(("${ISSUES_CREATED[$repo]:-0}" + 1)) + # see if author is a maintainer + if ! user_is_maintainer "$author"; then + ISSUES_CREATED_NON_MAINT["$repo"]=$(("${ISSUES_CREATED_NON_MAINT[$repo]:-0}" + 1)) + fi +done < issues_created.txt +rm -f issues_created.txt + +# Count issues closed in the date range (separate query to catch older issues) +get_issues_closed > issues_closed.txt +# shellcheck disable=SC2034 +while read -r number author is_bot; do + # exclude bot issues completely + if [[ "$is_bot" == "true" ]]; then + continue + fi + ISSUES_CLOSED["$repo"]=$(("${ISSUES_CLOSED[$repo]:-0}" + 1)) + # see if author is a maintainer + if ! user_is_maintainer "$author"; then + ISSUES_CLOSED_NON_MAINT["$repo"]=$(("${ISSUES_CLOSED_NON_MAINT[$repo]:-0}" + 1)) + fi +done < issues_closed.txt +rm -f issues_closed.txt + +if [ -n "${PRS_CSVFILE:-}" ]; then + if [ ! -s "${PRS_CSVFILE}" ]; then + echo Role,PRs Created,PRs Merged,PRs open,Created non-maint,Merged non-maint,Open non-maint > "$PRS_CSVFILE" + fi + echo "$repo,${PRS_CREATED[$repo]:-0},${PRS_MERGED[$repo]:-0},${PRS_OPEN[$repo]:-0},${PRS_CREATED_NON_MAINT[$repo]:-0},${PRS_MERGED_NON_MAINT[$repo]:-0},${PRS_OPEN_NON_MAINT[$repo]:-0}" >> "$PRS_CSVFILE" +else + echo In the range "$DATE_RANGE" in "$upstream_org/$repo": + echo PRs created: "${PRS_CREATED[$repo]:-0}" + echo PRs merged: "${PRS_MERGED[$repo]:-0}" + echo PRs open: "${PRS_OPEN[$repo]:-0}" + echo PRs created by non-maintainers: "${PRS_CREATED_NON_MAINT[$repo]:-0}" + echo PRs merged from non-maintainers: "${PRS_MERGED_NON_MAINT[$repo]:-0}" + echo PRs open from non-maintainers: "${PRS_OPEN_NON_MAINT[$repo]:-0}" +fi +if [ -n "${ISSUES_CSVFILE:-}" ]; then + if [ ! -s "${ISSUES_CSVFILE}" ]; then + echo Role,Issues Created,Issues Closed,Created non-maint,Closed non-maint > "$ISSUES_CSVFILE" + fi + echo "$repo,${ISSUES_CREATED[$repo]:-0},${ISSUES_CLOSED[$repo]:-0},${ISSUES_CREATED_NON_MAINT[$repo]:-0},${ISSUES_CLOSED_NON_MAINT[$repo]:-0}" >> "$ISSUES_CSVFILE" +else + echo In the range "$DATE_RANGE" in "$upstream_org/$repo": + echo Issues created: "${ISSUES_CREATED[$repo]:-0}" + echo Issues closed: "${ISSUES_CLOSED[$repo]:-0}" + echo Issues created by non-maintainers: "${ISSUES_CREATED_NON_MAINT[$repo]:-0}" + echo Issues closed from non-maintainers: "${ISSUES_CLOSED_NON_MAINT[$repo]:-0}" +fi diff --git a/scripts/generate_graphs.py b/scripts/generate_graphs.py new file mode 100755 index 0000000..9aba3ae --- /dev/null +++ b/scripts/generate_graphs.py @@ -0,0 +1,486 @@ +#!/usr/bin/env python3 +""" +Generate graphs from quarterly metrics data. + +Graphs generated: +1. Pull Requests Stats per Quarter (6 metrics) - all historical data +2. Issues Stats per Quarter (4 metrics) - all historical data +3. Legacy Roles Downloads per Role (current quarter, cumulative totals) +4. Legacy Roles Quarterly Delta per Role (downloads gained during current quarter) +5. Total Legacy Roles Downloads (cumulative) - all historical data +6. Total Legacy Roles Quarterly Delta (new downloads per quarter) - all historical data +7. fedora.linux_system_roles Downloads - all historical data +8. microsoft.sql Downloads - all historical data +""" + +import os +import sys +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + +SCRIPT_DIR = Path(__file__).parent +ROOT_DIR = SCRIPT_DIR.parent + +# Graph configuration +GRAPH_STYLE = 'seaborn-v0_8' +GRAPH_DPI = 300 +GRAPH_FIGSIZE = (12, 6) +SHOW_GRID = True +SHOW_TRENDS = True + +# Color scheme +COLORS = { + 'maintainer': '#2E86AB', # Blue + 'external': '#A23B72', # Purple + 'merged': '#06A77D', # Green + 'open': '#F18F01', # Orange + 'closed': '#C73E1D' # Red +} + + +def setup_plot_style(): + """Set up matplotlib style""" + try: + plt.style.use(GRAPH_STYLE) + except OSError as e: + print(f"Warning: Style '{GRAPH_STYLE}' not found ({e}), using default") + plt.style.use('default') + + +def save_graph(output_filename): + """Helper to save graph with consistent settings""" + output_path = ROOT_DIR / "reports" / "images" / output_filename + output_path.parent.mkdir(parents=True, exist_ok=True) + plt.tight_layout() + plt.savefig(output_path, dpi=GRAPH_DPI, bbox_inches='tight') + plt.close() + print(f" Saved: {output_path}") + + +def generate_github_prs_graph(quarter): + """Generate GitHub PRs stats per quarter (all 6 metrics) - shows ALL historical data""" + print("Generating GitHub PRs graph...") + + summary_file = ROOT_DIR / "data" / "github_prs_summary.csv" + if not summary_file.exists(): + print(" No github_prs_summary.csv found") + return + + df = pd.read_csv(summary_file).sort_values('Quarter') + # Show ALL data, not just last N quarters + print(f" Showing {len(df)} quarters of data") + + figsize = GRAPH_FIGSIZE + fig, ax = plt.subplots(figsize=figsize) + + x = np.arange(len(df)) + quarters = df['Quarter'].values + + colors = COLORS + + # Plot all 6 metrics + ax.plot(x, df['PRs Created'], marker='o', linewidth=2, markersize=8, + label='PRs Created', color=colors['maintainer']) + ax.plot(x, df['PRs Merged'], marker='s', linewidth=2, markersize=8, + label='PRs Merged', color=colors['merged']) + ax.plot(x, df['PRs Open'], marker='^', linewidth=2, markersize=8, + label='PRs Open', color=colors['open']) + ax.plot(x, df['External PRs Created'], marker='D', linewidth=2, markersize=6, + label='External Created', color=colors['external'], linestyle='--') + ax.plot(x, df['External PRs Merged'], marker='v', linewidth=2, markersize=6, + label='External Merged', color=colors['closed'], linestyle='--') + ax.plot(x, df['External PRs Open'], marker='<', linewidth=2, markersize=6, + label='External Open', color='#FF6B6B', linestyle='--') + + ax.set_xlabel('Quarter', fontsize=12, fontweight='bold') + ax.set_ylabel('Number of Pull Requests', fontsize=12, fontweight='bold') + ax.set_title('GitHub Pull Requests Statistics per Quarter', fontsize=14, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels(quarters, rotation=45, ha='right') + ax.legend(loc='best', fontsize=10) + + if SHOW_GRID: + ax.grid(alpha=0.3) + + save_graph("github-prs.png") + + +def generate_github_issues_graph(quarter): + """Generate GitHub Issues stats per quarter (all 4 metrics) - shows ALL historical data""" + print("Generating GitHub Issues graph...") + + summary_file = ROOT_DIR / "data" / "github_issues_summary.csv" + if not summary_file.exists(): + print(" No github_issues_summary.csv found") + return + + df = pd.read_csv(summary_file).sort_values('Quarter') + # Show ALL data, not just last N quarters + print(f" Showing {len(df)} quarters of data") + + figsize = GRAPH_FIGSIZE + fig, ax = plt.subplots(figsize=figsize) + + x = np.arange(len(df)) + quarters = df['Quarter'].values + + colors = COLORS + + # Plot all 4 metrics + ax.plot(x, df['Issues Created'], marker='o', linewidth=2, markersize=8, + label='Issues Created', color=colors['maintainer']) + ax.plot(x, df['Issues Closed'], marker='s', linewidth=2, markersize=8, + label='Issues Closed', color=colors['merged']) + ax.plot(x, df['External Issues Created'], marker='D', linewidth=2, markersize=6, + label='External Created', color=colors['external'], linestyle='--') + ax.plot(x, df['External Issues Closed'], marker='v', linewidth=2, markersize=6, + label='External Closed', color=colors['closed'], linestyle='--') + + ax.set_xlabel('Quarter', fontsize=12, fontweight='bold') + ax.set_ylabel('Number of Issues', fontsize=12, fontweight='bold') + ax.set_title('GitHub Issues Statistics per Quarter', fontsize=14, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels(quarters, rotation=45, ha='right') + ax.legend(loc='best', fontsize=10) + + if SHOW_GRID: + ax.grid(alpha=0.3) + + plt.tight_layout() + + save_graph("github-issues.png") + + +def generate_legacy_roles_per_role_graph(quarter): + """Generate legacy roles downloads per role (current quarter only, sorted)""" + print("Generating Galaxy legacy roles per-role graph...") + + data_dir = ROOT_DIR / "data" / quarter + if not data_dir.exists(): + print(f" Quarter directory {quarter} does not exist - skipping per-role graph") + return + + data_file = data_dir / "galaxy_legacy.csv" + if not data_file.exists(): + print(f" No galaxy_legacy.csv found for {quarter}") + return + + df = pd.read_csv(data_file) + + # Sort by download count (ascending - smallest to highest) + df = df.sort_values('download_count', ascending=True) + + figsize = (10, 12) + fig, ax = plt.subplots(figsize=figsize) + + y_pos = np.arange(len(df)) + colors = COLORS + + ax.barh(y_pos, df['download_count'].values, color=colors['merged']) + + ax.set_yticks(y_pos) + ax.set_yticklabels(df['name'].values, fontsize=9) + ax.set_xlabel('Downloads', fontsize=12, fontweight='bold') + ax.set_title(f'Galaxy Legacy Roles Downloads ({quarter})', + fontsize=14, fontweight='bold') + + if SHOW_GRID: + ax.grid(axis='x', alpha=0.3) + + # Add value labels + for i, val in enumerate(df['download_count'].values): + ax.text(val, i, f' {val:,}', va='center', fontsize=8) + + plt.tight_layout() + + save_graph(f"galaxy-legacy-per-role-{quarter}.png") + + +def generate_legacy_roles_quarterly_delta_graph(quarter): + """Generate legacy roles quarterly downloads delta (growth during the quarter)""" + print("Generating Galaxy legacy roles quarterly delta graph...") + + # Read historical per-role CSV + history_file = ROOT_DIR / "data" / "galaxy_legacy_per_role_history.csv" + if not history_file.exists(): + print(f" No per-role history file found - skipping delta graph") + return + + history_df = pd.read_csv(history_file) + + # Check if current quarter exists in history + if quarter not in history_df.columns: + print(f" Quarter {quarter} not found in history - skipping delta graph") + return + + # Find previous quarter column + quarters = [col for col in history_df.columns if col != 'Role' and col.startswith('20')] + quarters = sorted(quarters) + + if quarter not in quarters: + print(f" Quarter {quarter} not in history columns - skipping delta graph") + return + + current_idx = quarters.index(quarter) + if current_idx == 0: + print(f" No previous quarter available - skipping delta graph") + return + + prev_quarter = quarters[current_idx - 1] + print(f" Comparing {quarter} to {prev_quarter}") + + # Extract current and previous quarter data + current_df = history_df[['Role', quarter]].copy() + current_df.columns = ['name', 'download_count_current'] + current_df['download_count_current'] = pd.to_numeric(current_df['download_count_current'], errors='coerce').fillna(0) + + prev_df = history_df[['Role', prev_quarter]].copy() + prev_df.columns = ['name', 'download_count_prev'] + prev_df['download_count_prev'] = pd.to_numeric(prev_df['download_count_prev'], errors='coerce').fillna(0) + + # Merge on role name + merged = pd.merge(current_df, prev_df, on='name', how='outer') + + # Fill NaN with 0 (for roles that didn't exist in previous quarter) + merged['download_count_prev'] = merged['download_count_prev'].fillna(0) + merged['download_count_current'] = merged['download_count_current'].fillna(0) + + # Calculate delta (downloads gained during this quarter) + merged['delta'] = (merged['download_count_current'] - merged['download_count_prev']).astype(int) + + # Sort by delta (ascending - smallest to highest growth) + merged = merged.sort_values('delta', ascending=True) + + figsize = (10, 12) + fig, ax = plt.subplots(figsize=figsize) + + y_pos = np.arange(len(merged)) + colors = COLORS + + # Use different colors for positive and negative deltas + bar_colors = [colors['merged'] if val >= 0 else colors['closed'] for val in merged['delta'].values] + + ax.barh(y_pos, merged['delta'].values, color=bar_colors) + + ax.set_yticks(y_pos) + ax.set_yticklabels(merged['name'].values, fontsize=9) + ax.set_xlabel('Downloads This Quarter (Delta)', fontsize=12, fontweight='bold') + ax.set_title(f'Galaxy Legacy Roles - Quarterly Growth ({quarter})', + fontsize=14, fontweight='bold') + + if SHOW_GRID: + ax.grid(axis='x', alpha=0.3) + + # Add value labels + for i, val in enumerate(merged['delta'].values): + ax.text(val, i, f' {int(val):+,}', va='center', fontsize=8) + + plt.tight_layout() + + save_graph(f"galaxy-legacy-per-role-delta-{quarter}.png") + + +def generate_legacy_total_graph(quarter): + """Generate total legacy roles downloads - shows ALL historical data""" + print("Generating Galaxy legacy total downloads graph...") + + summary_file = ROOT_DIR / "data" / "galaxy_legacy_summary.csv" + if not summary_file.exists(): + print(" No galaxy_legacy_summary.csv found") + return + + df = pd.read_csv(summary_file).sort_values('Quarter') + # Show ALL data, not just last N quarters + print(f" Showing {len(df)} quarters of data") + + figsize = GRAPH_FIGSIZE + fig, ax = plt.subplots(figsize=figsize) + + x = np.arange(len(df)) + quarters = df['Quarter'].values + downloads = df['Total Downloads'].values + + colors = COLORS + + ax.plot(x, downloads, marker='o', linewidth=3, markersize=10, + color=colors['merged'], label='Total Downloads') + + # Add trend line if enabled + if SHOW_TRENDS and len(x) > 2: + z = np.polyfit(x, downloads, 1) + p = np.poly1d(z) + ax.plot(x, p(x), "--", alpha=0.5, color=colors['closed'], + linewidth=2, label='Trend') + + ax.set_xlabel('Quarter', fontsize=12, fontweight='bold') + ax.set_ylabel('Total Downloads (Cumulative)', fontsize=12, fontweight='bold') + ax.set_title('Galaxy Legacy Roles - Total Downloads Over Time', + fontsize=14, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels(quarters, rotation=45, ha='right') + ax.legend(loc='best') + + if SHOW_GRID: + ax.grid(alpha=0.3) + + # Add value labels + for i, val in enumerate(downloads): + ax.text(i, val, f'{val:,.0f}', ha='center', va='bottom', + fontweight='bold', fontsize=9) + + plt.tight_layout() + + save_graph("galaxy-legacy-total.png") + + +def generate_legacy_quarterly_delta_total_graph(quarter): + """Generate quarterly delta for total legacy downloads (new downloads per quarter)""" + print("Generating Galaxy legacy quarterly delta total graph...") + + summary_file = ROOT_DIR / "data" / "galaxy_legacy_summary.csv" + if not summary_file.exists(): + print(" No galaxy_legacy_summary.csv found") + return + + df = pd.read_csv(summary_file).sort_values('Quarter') + + if len(df) < 2: + print(" Need at least 2 quarters to calculate delta") + return + + # Calculate quarterly delta + df['Delta'] = df['Total Downloads'].diff() + + # Remove first row (no delta for first quarter) + df_delta = df[1:].copy() + + print(f" Showing {len(df_delta)} quarters of delta data") + + figsize = GRAPH_FIGSIZE + fig, ax = plt.subplots(figsize=figsize) + + x = np.arange(len(df_delta)) + quarters = df_delta['Quarter'].values + deltas = df_delta['Delta'].values + + colors = COLORS + + # Bar chart of quarterly growth + ax.bar(x, deltas, color=colors['merged'], alpha=0.8, label='Quarterly Growth') + + ax.set_xlabel('Quarter', fontsize=12, fontweight='bold') + ax.set_ylabel('New Downloads (Quarterly)', fontsize=12, fontweight='bold') + ax.set_title('Galaxy Legacy Roles - Quarterly Download Growth', + fontsize=14, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels(quarters, rotation=45, ha='right') + ax.legend(loc='best') + + if SHOW_GRID: + ax.grid(axis='y', alpha=0.3) + + # Add value labels + for i, val in enumerate(deltas): + ax.text(i, val, f'{val:,.0f}', ha='center', va='bottom', + fontweight='bold', fontsize=9) + + plt.tight_layout() + + save_graph("galaxy-legacy-total-delta.png") + + +def generate_collection_graph(quarter, collection_name): + """Generate individual collection downloads graph - shows ALL historical data""" + print(f"Generating {collection_name} downloads graph...") + + summary_file = ROOT_DIR / "data" / "galaxy_collections_summary.csv" + if not summary_file.exists(): + print(" No galaxy_collections_summary.csv found") + return + + df = pd.read_csv(summary_file).sort_values('Quarter') + # Show ALL data, not just last N quarters + print(f" Showing {len(df)} quarters of data") + + if collection_name not in df.columns: + print(f" Collection {collection_name} not found in summary") + return + + figsize = GRAPH_FIGSIZE + fig, ax = plt.subplots(figsize=figsize) + + x = np.arange(len(df)) + quarters = df['Quarter'].values + downloads = df[collection_name].values + + colors = COLORS + color = colors['maintainer'] if 'fedora' in collection_name else colors['external'] + + ax.plot(x, downloads, marker='o', linewidth=3, markersize=10, + color=color, label=collection_name) + + # Add trend line if enabled + if SHOW_TRENDS and len(x) > 2: + z = np.polyfit(x, downloads, 1) + p = np.poly1d(z) + ax.plot(x, p(x), "--", alpha=0.5, color=colors['closed'], + linewidth=2, label='Trend') + + ax.set_xlabel('Quarter', fontsize=12, fontweight='bold') + ax.set_ylabel('Downloads (Quarterly)', fontsize=12, fontweight='bold') + ax.set_title(f'{collection_name} - Downloads per Quarter', + fontsize=14, fontweight='bold') + ax.set_xticks(x) + ax.set_xticklabels(quarters, rotation=45, ha='right') + ax.legend(loc='best') + + if SHOW_GRID: + ax.grid(alpha=0.3) + + # Add value labels + for i, val in enumerate(downloads): + ax.text(i, val, f'{val:,.0f}', ha='center', va='bottom', + fontweight='bold', fontsize=9) + + # Sanitize filename and save + safe_name = collection_name.replace('.', '-') + save_graph(f"galaxy-collection-{safe_name}.png") + + +def main(): + # Get quarter from environment + quarter = os.getenv('QUARTER') + if not quarter: + print("ERROR: QUARTER environment variable not set") + print("Usage: QUARTER=2025-Q1 python generate_graphs.py") + sys.exit(1) + + # Setup plot style + setup_plot_style() + + # Ensure output directory exists + output_dir = ROOT_DIR / "reports" / "images" + output_dir.mkdir(parents=True, exist_ok=True) + + print(f"Generating graphs for {quarter}...") + print("=" * 60) + + # Generate all graphs + generate_github_prs_graph(quarter) + generate_github_issues_graph(quarter) + generate_legacy_roles_per_role_graph(quarter) + generate_legacy_roles_quarterly_delta_graph(quarter) + generate_legacy_total_graph(quarter) + generate_legacy_quarterly_delta_total_graph(quarter) + generate_collection_graph(quarter, 'fedora.linux_system_roles') + generate_collection_graph(quarter, 'microsoft.sql') + + print("=" * 60) + print(f"✅ Graph generation complete for {quarter}") + print(f" Output directory: {output_dir}") + + +if __name__ == '__main__': + main() diff --git a/scripts/update_quarterly_summary.py b/scripts/update_quarterly_summary.py new file mode 100755 index 0000000..d51a16a --- /dev/null +++ b/scripts/update_quarterly_summary.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +""" +Update quarterly summary CSV files with aggregated data from the current quarter. + +This script: +1. Reads the current quarter's detailed CSVs (prs.csv, issues.csv, galaxy_*.csv) +2. Aggregates the data into totals +3. Updates or appends to the summary files in data/: + - github_prs_summary.csv + - github_issues_summary.csv + - galaxy_legacy_summary.csv + - galaxy_collections_summary.csv +""" + +import os +import sys +from pathlib import Path +import pandas as pd + +SCRIPT_DIR = Path(__file__).parent +ROOT_DIR = SCRIPT_DIR.parent + + +def update_summary_file(filepath, quarter_data, key_column='Quarter'): + """Update or append to a summary CSV file""" + + if filepath.exists(): + df = pd.read_csv(filepath) + + # Check if quarter already exists + if quarter_data[key_column] in df[key_column].values: + # Remove old row and add new one + df = df[df[key_column] != quarter_data[key_column]] + df = pd.concat([df, pd.DataFrame([quarter_data])], ignore_index=True) + print(f" Updated existing entry for {quarter_data[key_column]}") + else: + # Append new row + df = pd.concat([df, pd.DataFrame([quarter_data])], ignore_index=True) + print(f" Added new entry for {quarter_data[key_column]}") + + # Sort by quarter + df = df.sort_values(key_column) + else: + # Create new file + df = pd.DataFrame([quarter_data]) + print(f" Created new file with {quarter_data[key_column]}") + + # Write back to CSV + df.to_csv(filepath, index=False) + + return df + + +def aggregate_github_prs(quarter): + """Aggregate GitHub PRs data""" + data_dir = ROOT_DIR / "data" / quarter + prs_file = data_dir / "prs.csv" + + if not prs_file.exists(): + print(f" WARNING: {prs_file} not found") + return None + + prs_df = pd.read_csv(prs_file) + + summary = { + 'Quarter': quarter, + 'PRs Created': int(prs_df['PRs Created'].sum()), + 'PRs Merged': int(prs_df['PRs Merged'].sum()), + 'PRs Open': int(prs_df['PRs open'].sum()), + 'External PRs Created': int(prs_df['Created non-maint'].sum()), + 'External PRs Merged': int(prs_df['Merged non-maint'].sum()), + 'External PRs Open': int(prs_df['Open non-maint'].sum()) + } + + print(f"\n PRs Created: {summary['PRs Created']:,}") + print(f" PRs Merged: {summary['PRs Merged']:,}") + print(f" External PRs Created: {summary['External PRs Created']:,}") + + return summary + + +def aggregate_github_issues(quarter): + """Aggregate GitHub Issues data""" + data_dir = ROOT_DIR / "data" / quarter + issues_file = data_dir / "issues.csv" + + if not issues_file.exists(): + print(f" WARNING: {issues_file} not found") + return None + + issues_df = pd.read_csv(issues_file) + + summary = { + 'Quarter': quarter, + 'Issues Created': int(issues_df['Issues Created'].sum()), + 'Issues Closed': int(issues_df['Issues Closed'].sum()), + 'External Issues Created': int(issues_df['Created non-maint'].sum()), + 'External Issues Closed': int(issues_df['Closed non-maint'].sum()) + } + + print(f"\n Issues Created: {summary['Issues Created']:,}") + print(f" Issues Closed: {summary['Issues Closed']:,}") + print(f" External Issues Created: {summary['External Issues Created']:,}") + + return summary + + +def aggregate_galaxy_legacy(quarter): + """Aggregate Galaxy Legacy roles data (cumulative totals)""" + data_dir = ROOT_DIR / "data" / quarter + galaxy_file = data_dir / "galaxy_legacy.csv" + + if not galaxy_file.exists(): + print(f" WARNING: {galaxy_file} not found") + return None + + galaxy_df = pd.read_csv(galaxy_file) + total_downloads = int(galaxy_df['download_count'].sum()) + + summary = { + 'Quarter': quarter, + 'Total Downloads': total_downloads + } + + print(f"\n Total Downloads (cumulative): {total_downloads:,}") + + return summary + + +def aggregate_galaxy_collections(quarter): + """ + Aggregate Galaxy Collections data and calculate quarterly delta. + + The API provides cumulative totals, but we want to store quarterly new downloads. + We maintain a separate cumulative tracking file to calculate deltas. + """ + data_dir = ROOT_DIR / "data" / quarter + collections_file = data_dir / "galaxy_collections.csv" + + if not collections_file.exists(): + print(f" WARNING: {collections_file} not found") + return None + + collections_df = pd.read_csv(collections_file) + + # Get current cumulative totals from API + current_cumulative = {} + for _, row in collections_df.iterrows(): + collection_name = row['full_name'] + downloads = int(row['download_count']) + current_cumulative[collection_name] = downloads + + # Read cumulative tracking file to get previous totals + cumulative_file = ROOT_DIR / "data" / "galaxy_collections_cumulative.csv" + previous_cumulative = {} + + if cumulative_file.exists(): + cumulative_df = pd.read_csv(cumulative_file) + # Exclude current quarter if it exists, sort chronologically, then get the last row + previous_df = cumulative_df[cumulative_df['Quarter'] != quarter].sort_values('Quarter') + if len(previous_df) > 0: + last_row = previous_df.iloc[-1] + previous_cumulative['fedora.linux_system_roles'] = int(last_row.get('fedora.linux_system_roles', 0)) + previous_cumulative['microsoft.sql'] = int(last_row.get('microsoft.sql', 0)) + print(f" Previous quarter: {last_row['Quarter']}") + print(f" fedora cumulative: {previous_cumulative['fedora.linux_system_roles']:,}") + print(f" microsoft cumulative: {previous_cumulative['microsoft.sql']:,}") + + # Calculate quarterly deltas + fedora_delta = current_cumulative.get('fedora.linux_system_roles', 0) - previous_cumulative.get('fedora.linux_system_roles', 0) + microsoft_delta = current_cumulative.get('microsoft.sql', 0) - previous_cumulative.get('microsoft.sql', 0) + total_delta = fedora_delta + microsoft_delta + + # Update cumulative tracking file + new_cumulative_row = { + 'Quarter': quarter, + 'fedora.linux_system_roles': current_cumulative.get('fedora.linux_system_roles', 0), + 'microsoft.sql': current_cumulative.get('microsoft.sql', 0) + } + + if cumulative_file.exists(): + cumulative_df = pd.read_csv(cumulative_file) + # Check if quarter already exists + if quarter in cumulative_df['Quarter'].values: + # Remove old row and add new one + cumulative_df = cumulative_df[cumulative_df['Quarter'] != quarter] + cumulative_df = pd.concat([cumulative_df, pd.DataFrame([new_cumulative_row])], ignore_index=True) + else: + # Append new row + cumulative_df = pd.concat([cumulative_df, pd.DataFrame([new_cumulative_row])], ignore_index=True) + else: + cumulative_df = pd.DataFrame([new_cumulative_row]) + + cumulative_df = cumulative_df.sort_values('Quarter') + cumulative_df.to_csv(cumulative_file, index=False) + print(f" Updated cumulative tracking: {cumulative_file}") + + # Build summary with quarterly deltas + summary = { + 'Quarter': quarter, + 'fedora.linux_system_roles': fedora_delta, + 'microsoft.sql': microsoft_delta, + 'Total Downloads': total_delta + } + + print(f"\n fedora.linux_system_roles: {fedora_delta:,} (quarterly new downloads)") + print(f" microsoft.sql: {microsoft_delta:,} (quarterly new downloads)") + print(f" Total: {total_delta:,}") + + return summary + + +def update_galaxy_per_role_history(quarter): + """Update the historical per-role Galaxy legacy downloads CSV""" + data_dir = ROOT_DIR / "data" / quarter + galaxy_file = data_dir / "galaxy_legacy.csv" + + if not galaxy_file.exists(): + print(f" No galaxy_legacy.csv found for {quarter} - skipping per-role history update") + return + + # Read current quarter's per-role data + current_df = pd.read_csv(galaxy_file) + + # Read historical per-role CSV + history_file = ROOT_DIR / "data" / "galaxy_legacy_per_role_history.csv" + + if history_file.exists(): + history_df = pd.read_csv(history_file) + + # Check if quarter column already exists + if quarter in history_df.columns: + print(f" Quarter {quarter} column already exists - updating values") + # Convert quarter column to Int64 if it's string + if history_df[quarter].dtype == 'object': + history_df[quarter] = pd.to_numeric(history_df[quarter], errors='coerce').astype('Int64') + + # Update existing column + for _, row in current_df.iterrows(): + role_name = row['name'] + download_count = int(row['download_count']) + + # Find role in history and update + if role_name in history_df['Role'].values: + history_df.loc[history_df['Role'] == role_name, quarter] = download_count + else: + # Add new role if it doesn't exist + new_row = {'Role': role_name, quarter: download_count} + # Fill previous quarters with NaN + for col in history_df.columns: + if col != 'Role' and col != quarter: + new_row[col] = pd.NA + history_df = pd.concat([history_df, pd.DataFrame([new_row])], ignore_index=True) + else: + print(f" Adding new quarter column: {quarter}") + # Add new quarter column with integer dtype + history_df[quarter] = pd.Series(dtype='Int64') + + for _, row in current_df.iterrows(): + role_name = row['name'] + download_count = int(row['download_count']) + + # Find role in history and update + if role_name in history_df['Role'].values: + history_df.loc[history_df['Role'] == role_name, quarter] = download_count + else: + # Add new role + new_row = {'Role': role_name, quarter: download_count} + # Fill previous quarters with NaN (will be saved as empty) + for col in history_df.columns: + if col != 'Role' and col != quarter: + new_row[col] = pd.NA + history_df = pd.concat([history_df, pd.DataFrame([new_row])], ignore_index=True) + else: + # Create new history file + print(f" Creating new per-role history file") + history_data = {'Role': current_df['name'].tolist(), quarter: current_df['download_count'].tolist()} + history_df = pd.DataFrame(history_data) + + # Sort by role name + history_df = history_df.sort_values('Role') + + # Save updated history (replace NaN with empty string for CSV) + history_df.to_csv(history_file, index=False, na_rep='') + print(f" Updated per-role history: {history_file}") + print(f" Roles tracked: {len(history_df)}") + + +def main(): + # Get quarter from environment + quarter = os.getenv('QUARTER') + if not quarter: + print("ERROR: QUARTER environment variable not set") + print("Usage: QUARTER=2024-Q4 python update_quarterly_summary.py") + sys.exit(1) + + print(f"Updating quarterly summaries for {quarter}...") + print("=" * 60) + + # Update GitHub PRs + print("\n📊 GitHub PRs:") + prs_summary = aggregate_github_prs(quarter) + if prs_summary: + prs_file = ROOT_DIR / "data" / "github_prs_summary.csv" + update_summary_file(prs_file, prs_summary) + + # Update GitHub Issues + print("\n🐛 GitHub Issues:") + issues_summary = aggregate_github_issues(quarter) + if issues_summary: + issues_file = ROOT_DIR / "data" / "github_issues_summary.csv" + update_summary_file(issues_file, issues_summary) + + # Update Galaxy Legacy + print("\n📦 Galaxy Legacy Roles:") + legacy_summary = aggregate_galaxy_legacy(quarter) + if legacy_summary: + legacy_file = ROOT_DIR / "data" / "galaxy_legacy_summary.csv" + update_summary_file(legacy_file, legacy_summary) + + # Update Galaxy Collections + print("\n📚 Galaxy Collections:") + collections_summary = aggregate_galaxy_collections(quarter) + if collections_summary: + collections_file = ROOT_DIR / "data" / "galaxy_collections_summary.csv" + update_summary_file(collections_file, collections_summary) + + # Update Galaxy per-role history + print("\n📜 Galaxy Per-Role History:") + update_galaxy_per_role_history(quarter) + + print("\n" + "=" * 60) + print(f"✅ All quarterly summaries updated for {quarter}") + + +if __name__ == '__main__': + main()