diff --git a/.github/workflows/docusaurus-tests.yml b/.github/workflows/docusaurus-tests.yml index 2514bb4a..4f92d552 100644 --- a/.github/workflows/docusaurus-tests.yml +++ b/.github/workflows/docusaurus-tests.yml @@ -1,10 +1,6 @@ name: Docusaurus Tests on: - pull_request: - paths: - - 'docs/docusaurus/**' - - '.github/workflows/docusaurus-tests.yml' workflow_call: inputs: soft-fail: diff --git a/.github/workflows/pr-validation.yml b/.github/workflows/pr-validation.yml index bfee9d07..a3ccc2cb 100644 --- a/.github/workflows/pr-validation.yml +++ b/.github/workflows/pr-validation.yml @@ -127,6 +127,15 @@ jobs: contents: read id-token: write + # Docusaurus documentation site tests and build verification + docusaurus-tests: + name: Docusaurus Tests + uses: ./.github/workflows/docusaurus-tests.yml + with: + soft-fail: false + permissions: + contents: read + # Python pytest test execution pytest-tests: name: Pytest Tests diff --git a/docs/README.md b/docs/README.md index 0693aaa2..4864258f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -46,8 +46,6 @@ Standalone guides available now. These cover common tasks and will move into the | Guide | Description | |------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------| -| [AzureML Validation Job Debugging](operations/azureml-validation-job-debugging.md) | Diagnosing and resolving AzureML validation job failures on AKS, including pod scheduling and resource quota issues | -| [LeRobot Inference](inference/lerobot-inference.md) | Running LeRobot inference workloads with pre-trained policies on Azure infrastructure | | [MLflow Integration](training/mlflow-integration.md) | Configuring MLflow experiment tracking for SKRL training agents with automatic metric logging to Azure ML | | [Security Guide](operations/security-guide.md) | Security configuration inventory, deployment responsibilities, and hardening checklist for robotics workloads | diff --git a/docs/contributing/README.md b/docs/contributing/README.md index 52d77884..45eef877 100644 --- a/docs/contributing/README.md +++ b/docs/contributing/README.md @@ -84,7 +84,7 @@ Search existing resources before asking questions: * Search [GitHub Issues](https://github.com/microsoft/physical-ai-toolchain/issues) for similar questions or problems * Check [GitHub Discussions](https://github.com/microsoft/physical-ai-toolchain/discussions) for community Q&A * Review [docs/](https://github.com/microsoft/physical-ai-toolchain/tree/main/docs) for troubleshooting guides -* See [azureml-validation-job-debugging.md](../operations/azureml-validation-job-debugging.md) for common deployment and workflow issues +* See [azureml-validation-job-debugging.md](../evaluation/azureml-validation-job-debugging.md) for common deployment and workflow issues If you cannot find an answer: diff --git a/docs/contributing/contribution-workflow.md b/docs/contributing/contribution-workflow.md index f2c58874..1849fd1c 100644 --- a/docs/contributing/contribution-workflow.md +++ b/docs/contributing/contribution-workflow.md @@ -190,7 +190,7 @@ Documentation contributions improve the architecture for the entire robotics and ### High-Value Documentation Contributions -* Deployment troubleshooting guides: Expand [azureml-validation-job-debugging.md](../operations/azureml-validation-job-debugging.md) with new scenarios +* Deployment troubleshooting guides: Expand [azureml-validation-job-debugging.md](../evaluation/azureml-validation-job-debugging.md) with new scenarios * Region/SKU compatibility matrices: Document tested combinations and known limitations * Cost optimization strategies: Real-world cost profiles and reduction techniques * Network architecture decisions: Guidance on when to use private vs. hybrid vs. public modes diff --git a/docs/data-pipeline/chunking-compression-config.md b/docs/data-pipeline/chunking-compression-config.md index 564ae9fb..6f72f613 100644 --- a/docs/data-pipeline/chunking-compression-config.md +++ b/docs/data-pipeline/chunking-compression-config.md @@ -356,7 +356,6 @@ Post-processing or LeRobot conversion fails with codec errors (`zstd: error` in ## Related Documentation -- [LeRobot Inference](../inference/lerobot-inference.md) for policy deployment after training on recorded data - [Security Guide](../operations/security-guide.md) for data encryption of recorded bags at rest ## Sources diff --git a/docs/docusaurus/docusaurus.config.js b/docs/docusaurus/docusaurus.config.js index 0407dc96..815d649e 100644 --- a/docs/docusaurus/docusaurus.config.js +++ b/docs/docusaurus/docusaurus.config.js @@ -13,12 +13,13 @@ const config = { organizationName: 'microsoft', projectName: 'physical-ai-toolchain', - onBrokenLinks: 'warn', + onBrokenLinks: 'throw', + onDuplicateRoutes: 'throw', markdown: { format: 'detect', hooks: { - onBrokenMarkdownLinks: 'warn', + onBrokenMarkdownLinks: 'throw', }, }, @@ -94,7 +95,7 @@ const config = { }, { label: 'Deploy', - to: '/deploy/', + to: '/infrastructure/', }, { label: 'Training', diff --git a/docs/docusaurus/src/data/hubCards.tsx b/docs/docusaurus/src/data/hubCards.tsx index 86e27725..bfc661ae 100644 --- a/docs/docusaurus/src/data/hubCards.tsx +++ b/docs/docusaurus/src/data/hubCards.tsx @@ -18,7 +18,7 @@ export const iconCards: IconCardProps[] = [ icon: , supertitle: 'Infrastructure', title: 'Deploy Infrastructure', - href: '/deploy/', + href: '/infrastructure/', description: 'Provision AKS clusters, networking, storage, and identity with Terraform.', }, { @@ -56,7 +56,7 @@ export const boxCards: BoxCardProps[] = [ title: 'Architecture Guide', links: [ { label: 'System architecture', href: '/contributing/architecture' }, - { label: 'Network topology', href: '/deploy/infrastructure-reference' }, + { label: 'Network topology', href: '/infrastructure/infrastructure-reference' }, { label: 'Lifecycle domains', href: '/contributing/architecture#domain-overview' }, ], icon: '/img/icons/clipboard-task.svg', @@ -74,8 +74,8 @@ export const boxCards: BoxCardProps[] = [ title: 'Security', links: [ { label: 'Security review checklist', href: '/contributing/security-review' }, - { label: 'Private cluster access', href: '/deploy/vpn' }, - { label: 'Identity and credentials', href: '/deploy/infrastructure#osmo-workload-identity' }, + { label: 'Private cluster access', href: '/infrastructure/vpn' }, + { label: 'Identity and credentials', href: '/infrastructure/infrastructure-deployment#osmo-workload-identity' }, ], icon: '/img/icons/shield-lock.svg', }, diff --git a/docs/docusaurus/static/.nojekyll b/docs/docusaurus/static/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/docs/fleet-deployment/README.md b/docs/fleet-deployment/README.md index f8ddd8ef..4ae07332 100644 --- a/docs/fleet-deployment/README.md +++ b/docs/fleet-deployment/README.md @@ -4,5 +4,5 @@ Deploy trained robot policies to edge fleets via FluxCD GitOps pipelines, image ## Topics -- [GitOps architecture](../../fleet-deployment/specifications/gitops.specification.md) -- [Deployment gating](../../fleet-deployment/specifications/gating-service.specification.md) +- [GitOps architecture](https://github.com/microsoft/physical-ai-toolchain/blob/main/fleet-deployment/specifications/gitops.specification.md) +- [Deployment gating](https://github.com/microsoft/physical-ai-toolchain/blob/main/fleet-deployment/specifications/gating-service.specification.md) diff --git a/docs/fleet-intelligence/README.md b/docs/fleet-intelligence/README.md index 061fde55..bd998a42 100644 --- a/docs/fleet-intelligence/README.md +++ b/docs/fleet-intelligence/README.md @@ -27,7 +27,7 @@ Fleet-wide telemetry collection, operational dashboards, drift detection, and au | Guide | Description | |-----------------------------------------------------------------------------------------------------------|---------------------------------------| -| [Telemetry Specification](../../fleet-intelligence/specifications/telemetry.specification.md) | Schema and routing architecture | -| [Dashboard Specification](../../fleet-intelligence/specifications/dashboards.specification.md) | Fleet dashboard and alerting design | -| [Drift Detection Specification](../../fleet-intelligence/specifications/drift-detection.specification.md) | Detection algorithms and thresholds | -| [Retraining Specification](../../fleet-intelligence/specifications/retraining.specification.md) | Automated retraining trigger pipeline | +| [Telemetry Specification](https://github.com/microsoft/physical-ai-toolchain/blob/main/fleet-intelligence/specifications/telemetry.specification.md) | Schema and routing architecture | +| [Dashboard Specification](https://github.com/microsoft/physical-ai-toolchain/blob/main/fleet-intelligence/specifications/dashboards.specification.md) | Fleet dashboard and alerting design | +| [Drift Detection Specification](https://github.com/microsoft/physical-ai-toolchain/blob/main/fleet-intelligence/specifications/drift-detection.specification.md) | Detection algorithms and thresholds | +| [Retraining Specification](https://github.com/microsoft/physical-ai-toolchain/blob/main/fleet-intelligence/specifications/retraining.specification.md) | Automated retraining trigger pipeline | diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index b11b64e8..a5e887a0 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -179,7 +179,6 @@ See [Cost Considerations](../contributing/cost-considerations.md) for detailed p | Resource | Description | |---------------------------------------------------------------------------------------------------|-------------------------------------------| -| [LeRobot Inference](../inference/lerobot-inference.md) | Run inference with trained LeRobot models | | [MLflow Integration](../training/mlflow-integration.md) | Track experiments with MLflow | | [Deployment Guide](https://github.com/microsoft/physical-ai-toolchain/blob/main/deploy/README.md) | Full deployment reference and options | | [Contributing Guide](../contributing/README.md) | Development workflow and code standards | diff --git a/docs/infrastructure/_category_.json b/docs/infrastructure/_category_.json index 40de5711..284e5bce 100644 --- a/docs/infrastructure/_category_.json +++ b/docs/infrastructure/_category_.json @@ -1,8 +1,4 @@ { "label": "Infrastructure", - "position": 2, - "link": { - "type": "generated-index", - "description": "Deploy infrastructure and platform components." - } + "position": 2 } diff --git a/docs/infrastructure/infrastructure.md b/docs/infrastructure/infrastructure.md index 8a24ec2c..acfa1e18 100644 --- a/docs/infrastructure/infrastructure.md +++ b/docs/infrastructure/infrastructure.md @@ -1,6 +1,7 @@ --- sidebar_position: 3 title: Infrastructure Deployment +slug: infrastructure-deployment description: Terraform configuration and deployment for AKS, Azure ML, storage, and OSMO backend services author: Microsoft Robotics-AI Team ms.date: 2026-03-02 diff --git a/docs/operations/README.md b/docs/operations/README.md index e910eefb..e2b20ca0 100644 --- a/docs/operations/README.md +++ b/docs/operations/README.md @@ -23,7 +23,6 @@ Centralized hub for operational documentation covering monitoring, troubleshooti | [Troubleshooting](troubleshooting.md) | Symptom-based resolution for common deployment, GPU, and workflow errors | | [Security Guide](security-guide.md) | Security configuration inventory and deployment checklist | | [GPU Configuration](../reference/gpu-configuration.md) | Driver selection, MIG strategy, and GPU Operator configuration | -| [AzureML Validation Job Debugging](./azureml-validation-job-debugging.md) | Debug AzureML extension and InstanceType validation failures | | [Deployment Validation](../contributing/deployment-validation.md) | Post-deployment verification steps | | [Cost Considerations](../contributing/cost-considerations.md) | Azure resource cost guidance | diff --git a/docs/operations/troubleshooting.md b/docs/operations/troubleshooting.md index 859aa199..6f2b9078 100644 --- a/docs/operations/troubleshooting.md +++ b/docs/operations/troubleshooting.md @@ -275,7 +275,6 @@ Convert all template expressions to Jinja syntax. For variable substitution, use ## Additional Resources - [GPU Configuration](../reference/gpu-configuration.md) -- [AzureML Validation Job Debugging](./azureml-validation-job-debugging.md) - [Security Guide](security-guide.md) - [Deployment Validation](../contributing/deployment-validation.md) - [NVIDIA CUDA Compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/) diff --git a/docs/reference/README.md b/docs/reference/README.md index 397f3e5c..30fe8984 100644 --- a/docs/reference/README.md +++ b/docs/reference/README.md @@ -30,7 +30,6 @@ Technical reference documentation for submission scripts, workflow templates, en ## 📚 Related Documentation * [Training Guide](../training/README.md) for RL and behavioral cloning workflows -* [Inference Guide](../inference/README.md) for running trained policies * [Deployment Guide](https://github.com/microsoft/physical-ai-toolchain/blob/main/deploy/README.md) for infrastructure provisioning diff --git a/docs/training/README.md b/docs/training/README.md index fc66d27c..41198a9b 100644 --- a/docs/training/README.md +++ b/docs/training/README.md @@ -53,7 +53,6 @@ LeRobot behavioral cloning on OSMO: ## 📚 Related Documentation - [Deployment Guide](https://github.com/microsoft/physical-ai-toolchain/blob/main/deploy/README.md) for infrastructure setup -- [LeRobot Inference](../inference/lerobot-inference.md) for running trained policies - [AzureML Workflows](https://github.com/microsoft/physical-ai-toolchain/blob/main/workflows/azureml/README.md) for job template reference - [OSMO Workflows](https://github.com/microsoft/physical-ai-toolchain/blob/main/workflows/osmo/README.md) for workflow template reference - [Scripts Reference](../reference/scripts.md) for CLI usage diff --git a/docs/training/lerobot-training.md b/docs/training/lerobot-training.md index c5d08211..acfd7a25 100644 --- a/docs/training/lerobot-training.md +++ b/docs/training/lerobot-training.md @@ -208,7 +208,6 @@ The `run-lerobot-pipeline.sh` script orchestrates the full lifecycle on OSMO: ## 🔗 Related Documentation -- [LeRobot Inference](../inference/lerobot-inference.md) for evaluating trained policies - [Experiment Tracking](experiment-tracking.md) for MLflow and WANDB configuration - [AzureML Workflows](https://github.com/microsoft/physical-ai-toolchain/blob/main/workflows/azureml/README.md) for job template reference - [OSMO Workflows](https://github.com/microsoft/physical-ai-toolchain/blob/main/workflows/osmo/README.md) for workflow template reference diff --git a/docs/training/mlflow-integration.md b/docs/training/mlflow-integration.md index 75010239..07578c0b 100644 --- a/docs/training/mlflow-integration.md +++ b/docs/training/mlflow-integration.md @@ -282,7 +282,6 @@ Log messages like `"Failed to extract or log metrics at step X"` indicate transi ## Related Documentation * [Training Guide](README.md) -* [Inference Guide](../inference/README.md) * [Workflow Templates](https://github.com/microsoft/physical-ai-toolchain/blob/main/workflows/README.md) ---