Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ jobs:
run: make setup-action-validator
- name: Validate composite actions
run: make lint-actions
check-nav-catalog:
name: Validate nav/catalog consistency
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Check nav matches catalog
run: python3 scripts/check-nav-catalog.py
make-compile:
name: Verify compiled workflows
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ lint-actions: setup-action-validator
done

lint: lint-workflows lint-actions
@python3 scripts/check-nav-catalog.py

docs-install:
@uv sync --dev
Expand Down
5 changes: 5 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ nav:
- Mention in PR (no sandbox): workflows/gh-agent-workflows/mention-in-pr-no-sandbox.md
- Event-driven:
- Automatic Issue Triage: workflows/gh-agent-workflows/issue-triage.md
- Dependency Review: workflows/gh-agent-workflows/dependency-review.md
- PR Review: workflows/gh-agent-workflows/pr-review.md
- Duplicate Issue Detector: workflows/gh-agent-workflows/duplicate-issue-detector.md
- Update PR Body: workflows/gh-agent-workflows/update-pr-body.md
Expand All @@ -54,10 +55,14 @@ nav:
- Text Quality: workflows/gh-agent-workflows/text-quality.md
- Standalone Scheduled:
- Agent Suggestions: workflows/gh-agent-workflows/agent-suggestions.md
- Autonomy Atomicity Analyzer: workflows/gh-agent-workflows/autonomy-atomicity-analyzer.md
- Breaking Change Detector: workflows/gh-agent-workflows/breaking-change-detector.md
- Code Simplifier: workflows/gh-agent-workflows/code-simplifier.md
- Flaky Test Investigator: workflows/gh-agent-workflows/flaky-test-investigator.md
- Framework Best Practices: workflows/gh-agent-workflows/framework-best-practices.md
- Information Architecture: workflows/gh-agent-workflows/information-architecture.md
- Performance Profiler: workflows/gh-agent-workflows/performance-profiler.md
- Product Manager Impersonator: workflows/gh-agent-workflows/product-manager-impersonator.md
- Project Summary: workflows/gh-agent-workflows/project-summary.md
- Release Update Check: workflows/gh-agent-workflows/release-update.md
- Small Problem Fixer: workflows/gh-agent-workflows/small-problem-fixer.md
Expand Down
125 changes: 125 additions & 0 deletions scripts/check-nav-catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""Validate that mkdocs.yml nav entries cover all workflows listed in the catalog.

Each workflow slug referenced in the "Available workflows" catalog
(docs/workflows/gh-agent-workflows.md) must be reachable from the mkdocs.yml nav,
either:

1. As a direct nav entry (workflows/gh-agent-workflows/<slug>.md), or
2. Mentioned in the content of a static overview page that IS in the nav
(e.g. docs/workflows/gh-agent-workflows/bugs.md covers bug-hunter and
bug-exterminator via inline documentation).

Exits with a non-zero status and prints the missing slugs when drift is detected.

Elastic-specific workflows (prefix ``estc-``) live in their own nav section and
are excluded from this check.
"""

import re
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).parent.parent
MKDOCS_YML = REPO_ROOT / "mkdocs.yml"
CATALOG_MD = REPO_ROOT / "docs" / "workflows" / "gh-agent-workflows.md"
STATIC_DOCS_DIR = REPO_ROOT / "docs" / "workflows" / "gh-agent-workflows"

ELASTIC_SPECIFIC_PREFIX = "estc-"


def extract_catalog_slugs(catalog_text: str) -> set[str]:
"""Extract workflow slugs from the markdown catalog page.

Scans for Markdown link targets of the form::

(gh-agent-workflows/<slug>.md)

where ``<slug>`` is a lowercase alphanumeric-and-hyphen string.
Returns the set of matching slugs.
"""
return {
m.group(1)
for m in re.finditer(r"\(gh-agent-workflows/([a-z0-9-]+)\.md\)", catalog_text)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW] Catalog regex ignores valid anchor/query links

extract_catalog_slugs only matches links that end exactly with .md, so entries like (gh-agent-workflows/foo.md#details) or (gh-agent-workflows/foo.md?ref=x) are skipped.

That can let nav/catalog drift slip through CI because the skipped workflow is never included in catalog_slugs. Consider allowing optional #... / ?... suffixes after .md (or parsing markdown links structurally).

}


def extract_nav_slugs(mkdocs_text: str) -> set[str]:
"""Extract workflow slugs directly referenced in the mkdocs.yml nav."""
return {
m.group(1)
for m in re.finditer(
r"workflows/gh-agent-workflows/([a-z0-9-]+)\.md", mkdocs_text

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW] Nav extraction scans full YAML instead of only nav

extract_nav_slugs regex-scans the entire mkdocs.yml text, so any non-nav reference like extra.note: workflows/gh-agent-workflows/foo.md is treated as a nav entry.

This can produce false passes by marking a workflow as reachable when it isn't in the navigation tree. Parsing YAML and traversing only the nav section would avoid this.

)
}


def extract_mentioned_slugs(page_content: str) -> set[str]:
"""Extract workflow slugs linked or referenced anywhere in a docs page.

Detects two patterns:

* Markdown link targets: ``gh-agent-workflows/<slug>.md``
(e.g. ``[Bug Hunter](gh-agent-workflows/bug-hunter.md)``)
* Directory references: ``gh-agent-workflows/<slug>/``
(e.g. in curl install snippets: ``.../gh-agent-workflows/bug-hunter/example.yml``)
"""
link_slugs = {
m.group(1)
for m in re.finditer(r"\bgh-agent-workflows/([a-z0-9-]+)\.md\b", page_content)
}
dir_slugs = {
m.group(1)
for m in re.finditer(r"\bgh-agent-workflows/([a-z0-9-]+)/", page_content)
}
return link_slugs | dir_slugs


def covered_slugs(nav_slugs: set[str]) -> set[str]:
"""Return the full set of workflow slugs reachable from the nav.

Includes direct nav slugs plus any slugs mentioned in the content of static
overview pages that are in the nav (e.g. bugs.md, code-duplication.md).
"""
reachable = set(nav_slugs)

for slug in nav_slugs:
overview_path = STATIC_DOCS_DIR / f"{slug}.md"
if overview_path.exists():
content = overview_path.read_text(encoding="utf-8")
reachable.update(extract_mentioned_slugs(content))

return reachable


def main() -> int:
catalog_text = CATALOG_MD.read_text(encoding="utf-8")
mkdocs_text = MKDOCS_YML.read_text(encoding="utf-8")

catalog_slugs = {
s for s in extract_catalog_slugs(catalog_text)
if not s.startswith(ELASTIC_SPECIFIC_PREFIX)
}
nav_slugs = {
s for s in extract_nav_slugs(mkdocs_text)
if not s.startswith(ELASTIC_SPECIFIC_PREFIX)
}

reachable = covered_slugs(nav_slugs)
missing_from_nav = catalog_slugs - reachable

if missing_from_nav:
print(
"ERROR: The following workflows are in the catalog "
"but not reachable from mkdocs.yml nav:"
)
for slug in sorted(missing_from_nav):
print(f" - {slug}")
return 1

print(f"OK: all {len(catalog_slugs)} catalog workflows are reachable from nav")
return 0


if __name__ == "__main__":
sys.exit(main())
Loading