diff --git a/.github/workflows/cd-cleanup.yml b/.github/workflows/cd-cleanup.yml index 7f1f2eb..39c34f6 100644 --- a/.github/workflows/cd-cleanup.yml +++ b/.github/workflows/cd-cleanup.yml @@ -14,6 +14,7 @@ permissions: jobs: scale-down: runs-on: ubuntu-latest + timeout-minutes: 10 # Per-matrix-env GitHub Environment so each iteration uses its own # AZURE_CLIENT_ID / AZURE_SUBSCRIPTION_ID vars + secrets. Without this, # both ci and ppe would resolve from whatever single environment was diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 391a003..a4b31b3 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -45,6 +45,7 @@ jobs: build-images: if: github.event_name == 'push' || github.event.inputs.imageTag == '' runs-on: ubuntu-latest + timeout-minutes: 45 permissions: contents: read packages: write @@ -153,6 +154,7 @@ jobs: always() && (needs.build-images.result == 'success' || needs.build-images.result == 'skipped') runs-on: ubuntu-latest + timeout-minutes: 45 outputs: imageTag: ${{ steps.tag.outputs.imageTag }} steps: @@ -184,7 +186,8 @@ jobs: with: environment: ci image_tag: ${{ needs.resolve-image-tag.outputs.imageTag }} - secrets: inherit + secrets: + AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} deploy-ppe: needs: [resolve-image-tag, deploy-ci] @@ -200,7 +203,8 @@ jobs: with: environment: ppe image_tag: ${{ needs.resolve-image-tag.outputs.imageTag }} - secrets: inherit + secrets: + AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} deploy-prod: needs: [resolve-image-tag, deploy-ppe] @@ -215,4 +219,5 @@ jobs: with: environment: prod image_tag: ${{ needs.resolve-image-tag.outputs.imageTag }} - secrets: inherit + secrets: + AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b54b6d4..b9e166c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,7 @@ concurrency: jobs: build-test: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -75,6 +76,7 @@ jobs: dependency-review: if: github.event_name == 'pull_request' runs-on: ubuntu-latest + timeout-minutes: 30 permissions: contents: read pull-requests: write @@ -87,6 +89,7 @@ jobs: bicep-lint: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -149,6 +152,7 @@ jobs: # they merge. actionlint: runs-on: ubuntu-latest + timeout-minutes: 30 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -163,6 +167,7 @@ jobs: # allowlists known false positives (Azure built-in RBAC role GUIDs). gitleaks: runs-on: ubuntu-latest + timeout-minutes: 30 permissions: contents: read steps: @@ -178,6 +183,7 @@ jobs: markdownlint: runs-on: ubuntu-latest + timeout-minutes: 30 permissions: contents: read steps: @@ -191,6 +197,7 @@ jobs: link-check: runs-on: ubuntu-latest + timeout-minutes: 30 permissions: contents: read issues: write diff --git a/.github/workflows/deploy-env.yml b/.github/workflows/deploy-env.yml index 75fc3e2..e1c7d06 100644 --- a/.github/workflows/deploy-env.yml +++ b/.github/workflows/deploy-env.yml @@ -46,6 +46,10 @@ on: apigateway_redirect: description: BFF OIDC redirect URI (for app-reg consent verification). value: ${{ jobs.deploy.outputs.apigateway_redirect }} + secrets: + AZURE_TENANT_ID: + description: Entra tenant id for OIDC federated login. Repo-level secret. + required: true permissions: contents: read @@ -53,6 +57,7 @@ permissions: jobs: deploy: runs-on: ubuntu-latest + timeout-minutes: 30 environment: ${{ inputs.environment }} concurrency: group: cd-deploy-${{ inputs.environment }} diff --git a/.github/workflows/pinact.yml b/.github/workflows/pinact.yml index 049cf33..87fd218 100644 --- a/.github/workflows/pinact.yml +++ b/.github/workflows/pinact.yml @@ -1,12 +1,7 @@ name: pinact -# Validates every third-party GitHub Action used in this repo is pinned to a -# *commit SHA* (not a mutable tag, and not an annotated-tag-object SHA either). -# Replaces the previous bespoke /tmp/pin.py script that mishandled annotated -# tags and produced unverifiable SHAs (see PR #79 follow-up). -# -# Tool: https://github.com/suzuki-shunsuke/pinact (industry standard). -# Action: https://github.com/suzuki-shunsuke/pinact-action +# Validate every 3rd-party GitHub Action is pinned to a commit SHA (not a mutable tag). +# Tool: https://github.com/suzuki-shunsuke/pinact on: pull_request: @@ -23,21 +18,15 @@ permissions: contents: read jobs: - verify: - name: Verify Action pins + pinact: + name: pinact runs-on: ubuntu-latest + timeout-minutes: 5 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: suzuki-shunsuke/pinact-action@cf51507d80d4d6522a07348e3d58790290eaf0b6 # v2.0.0 with: - # Validate-only: do not auto-push fix commits. - # PRs from same-repo branches don't get contents:write on GITHUB_TOKEN, - # and on main, failing CI loudly is preferable to a silent "pinact" bot - # commit (author should run `pinact run --fix` locally instead). skip_push: "true" - # --check: fail if any action is unpinned (mutable ref). - # --verify: fail if pinned SHA does not match the version comment - # (catches the annotated-tag-object-SHA bug fixed in PR #87). pinact_opts: --check --verify env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/repo-rulesets.yml b/.github/workflows/repo-rulesets.yml index afd1013..1431841 100644 --- a/.github/workflows/repo-rulesets.yml +++ b/.github/workflows/repo-rulesets.yml @@ -34,6 +34,7 @@ permissions: jobs: drift: runs-on: ubuntu-latest + timeout-minutes: 5 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index b3a2ce0..8bc763f 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -21,6 +21,7 @@ concurrency: jobs: analyze: runs-on: ubuntu-latest + timeout-minutes: 15 permissions: # Required for SARIF upload to code-scanning. security-events: write diff --git a/.github/workflows/wi-demo.yml b/.github/workflows/wi-demo.yml index b1ab3fc..2063e7f 100644 --- a/.github/workflows/wi-demo.yml +++ b/.github/workflows/wi-demo.yml @@ -14,6 +14,7 @@ permissions: jobs: acquire-token: runs-on: ubuntu-latest + timeout-minutes: 5 environment: demo steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/DOCTRINE.md b/DOCTRINE.md index 54c8fc8..055cd54 100644 --- a/DOCTRINE.md +++ b/DOCTRINE.md @@ -62,6 +62,20 @@ Everything else — language, framework, IaC, CI/CD, observability, FinOps, supp --- +## Authorization-policy doctrine (canonical) + +Source files reference this section instead of repeating the rationale inline. + +**The two-named-policy rule.** Resource APIs that accept both delegated and app callers expose **two** named policies (`*Delegated` for users, `*App` for S2S) — never an `OR` policy and never a single policy that ORs `scp` with `roles`. Each policy is mutually exclusive: a token carrying both `scp` and `roles` is rejected by both. Multi-tenant app-only resources expose one `*App` policy combining the role check, the `azp` allow-list, and `scp`-rejection. + +**Never `[Authorize(Roles = "...")]` on Microsoft.Identity.Web JWT schemes.** Those schemes set `MapInboundClaims = false` (defense-in-depth — see `EntraAuthServiceCollectionExtensions`), so the framework's role check looks for `ClaimTypes.Role` while Entra emits the role claim under the short name `roles`. The check silently fails: depending on what other `[Authorize]` attributes are present, this either locks everyone out or — worse — locks no one out. Always use the named-policy attributes (`[Authorize(Policy = OrdersAuthorizationPolicies.App)]`). + +**Why `azp` allow-list is required for app tokens.** `roles` proves *what* the caller wants to do; `azp` proves *who* the caller is. Without `azp` checking, any tenant admin who consents your app's app-role to *their own* malicious client can call you. The allow-list pins the set of client appIds you trust at the workload level, not the consent level. + +References: dotnet-engineering-guide ch02 §10, [`docs/decision-trees.md`](./docs/decision-trees.md) Tree 4, [`docs/validation.md`](./docs/validation.md) §4. + +--- + ## Sources - Microsoft.Identity.Web — [learn.microsoft.com/entra/identity-platform/microsoft-identity-web](https://learn.microsoft.com/entra/identity-platform/microsoft-identity-web) diff --git a/infra/bicep/modules/aca-stack.bicep b/infra/bicep/modules/aca-stack.bicep index 398ba56..fd03f05 100644 --- a/infra/bicep/modules/aca-stack.bicep +++ b/infra/bicep/modules/aca-stack.bicep @@ -29,22 +29,12 @@ param tags object = {} @description('Resolved Entra wiring (tenantId, app reg appIds, kitchen-worker MI clientId, downstream FQDNs). Empty `{}` on cold deploy → no Entra env vars are injected and apps fall back to appsettings.json placeholders. Populated by scripts/provision-apps.sh after Entra app regs and worker MI are known.') param entraConfig object = {} -// REQUIRED ORDER: services[0]=apiGateway, services[1]=ordersApi, -// services[2]=restaurantsApi, services[3]=kitchenWorker. -// The `services` output below indexes containerApps[] positionally because -// Bicep does not allow for-expressions inside `toObject(...)` for output -// values (BCP138) AND vars cannot reference module outputs (BCP182). Until -// that limitation is lifted (tracked as a follow-up to migrate to a true -// keyed-map output), callers MUST preserve both the length AND the order -// of the default array — overriding with a different ordering will silently -// produce a mis-keyed `services` map (e.g. apiGateway.fqdn pointing at the -// orders-api ingress). Bicep has no `assert` keyword, so this constraint -// is enforced by convention + review, not at template-evaluation time. -@description('Service definitions. project = csproj folder name; shortName = lowercase image/name suffix; key = stable Bicep map key (camelCase) used to dispatch env vars and build the output map; isWebApp = whether to expose HTTP ingress + /health/live + /health/ready probes. REQUIRED ORDER: [0]=apiGateway, [1]=ordersApi, [2]=restaurantsApi, [3]=kitchenWorker — the `services` output is positionally keyed against this ordering. Do NOT reorder or resize without also rewriting the `services` output map below.') +// REQUIRED ORDER: see `requiredOrder` in `services` param metadata below. +// The output map is positionally keyed; reordering breaks downstream wiring. +@description('Service definitions. project = csproj folder; shortName = lowercase image suffix; key = camelCase Bicep map key; isWebApp = expose HTTP ingress + probes. Position-keyed against the `services` output — preserve order and length.') @metadata({ requiredOrder: [ 'apiGateway', 'ordersApi', 'restaurantsApi', 'kitchenWorker' ] requiredLength: 4 - orderingConstraint: 'The `services` output below is positionally keyed against this array (containerApps[0]=apiGateway, etc). Reordering or resizing produces a silently mis-keyed output — do not override unless you also rewrite the output map.' }) param services array = [ { project: 'Ftgo.ApiGateway', shortName: 'apigateway', key: 'apiGateway', isWebApp: true } diff --git a/infra/bicep/modules/app-registrations.bicep b/infra/bicep/modules/app-registrations.bicep index 4078748..83d2912 100644 --- a/infra/bicep/modules/app-registrations.bicep +++ b/infra/bicep/modules/app-registrations.bicep @@ -1,18 +1,11 @@ metadata name = 'app-registrations' metadata description = 'Provisions the 3 FTGO Entra app registrations + service principals (BFF, Orders API, Restaurants API) and exposes deterministic scope/role IDs. Workers run as Managed Identity and do not need their own app reg.' -// IMPORTANT: bffMiClientId must NEVER be passed as empty on a re-run after the -// initial warm deploy. The FIC resource depends on this value as its 'subject'. -// If you re-run with an empty bffMiClientId, the FIC will be deleted, causing -// the BFF to lose its trust relationship with the BFF UAMI. The bootstrap -// flow (scripts/provision-apps.sh) must always pass the populated value. -// -// Cold-deploy semantics (bffMiClientId == '') are intentional and safe ONLY -// before the BFF Container App + its system-assigned MI exist. Once the FIC -// has been created, every subsequent tenant-scope deploy MUST resolve and -// pass the BFF MI clientId or the Microsoft.Graph extension will reconcile -// the FIC out of existence and break SignedAssertionFromManagedIdentity for -// the BFF until the next provision-apps.sh run. +// IMPORTANT: bffMiClientId must NEVER be empty after the BFF MI exists — the +// Microsoft.Graph extension would reconcile the FIC out of existence and break +// SignedAssertionFromManagedIdentity. Cold deploy passes '' intentionally; +// scripts/provision-apps.sh handles the resolve+rerun. Recovery: re-run +// provision-apps.sh. Full procedure: docs/operations.md § "CD identity (UAMI) recovery". // Scope/role IDs are deterministic GUIDs of (tenantId, app, value) so callers' references survive re-deploys. diff --git a/infra/bicep/modules/cd-bootstrap.bicep b/infra/bicep/modules/cd-bootstrap.bicep index 4e21594..d877fc7 100644 --- a/infra/bicep/modules/cd-bootstrap.bicep +++ b/infra/bicep/modules/cd-bootstrap.bicep @@ -37,24 +37,8 @@ resource fic 'Microsoft.ManagedIdentity/userAssignedIdentities/federatedIdentity } } -// Deterministic GUID → idempotent re-runs (same scope+principal+role → same name). -// -// NOTE: Role assignments are scoped to the UAMI's principalId at creation. -// If you delete and recreate the CD UAMI with the same name, the principalId -// changes; the OLD role assignments will linger and the deployment will fail -// with 'role assignment already exists'. Manually delete the old assignments -// before re-running. See operations.md § for the recovery -// procedure. -// -// Concretely: `mi.id` (resourceId) stays stable across delete+recreate -// because it is derived from name+RG, so `guid(rg.id, mi.id, roleId)` — -// which we use as the assignment name — also stays stable. ARM keys role -// assignments by name (immutable principalId on the existing record), so -// the redeploy attempts to update an immutable field and 409s. -// -// Recovery: `az role assignment delete --assignee ` (or -// `az role assignment list --scope --query "[?principalId==''].id" -// | xargs -n1 az role assignment delete --ids`) then re-run bootstrap.bicep. +// Deterministic name → idempotent re-runs. Recovery procedure for the +// "principalId changed after MI recreate" 409 → docs/operations.md § "CD identity (UAMI) recovery". resource roleAssignments 'Microsoft.Authorization/roleAssignments@2022-04-01' = [for roleId in roleIds: { name: guid(resourceGroup().id, mi.id, roleId) properties: { diff --git a/infra/bicep/modules/container-app.bicep b/infra/bicep/modules/container-app.bicep index 9773b9e..f8be212 100644 --- a/infra/bicep/modules/container-app.bicep +++ b/infra/bicep/modules/container-app.bicep @@ -47,7 +47,7 @@ param extraEnvVars array = [] var aspNetCoreEnvironment = '${toUpper(substring(environmentName, 0, 1))}${substring(environmentName, 1)}' -resource app 'Microsoft.App/containerApps@2024-10-02-preview' = { +resource app 'Microsoft.App/containerApps@2025-07-01' = { name: appName location: location tags: tags diff --git a/infra/bicep/modules/container-apps-environment.bicep b/infra/bicep/modules/container-apps-environment.bicep index 879665f..84aadd9 100644 --- a/infra/bicep/modules/container-apps-environment.bicep +++ b/infra/bicep/modules/container-apps-environment.bicep @@ -19,7 +19,7 @@ resource law 'Microsoft.OperationalInsights/workspaces@2023-09-01' existing = { name: logAnalyticsWorkspaceName } -resource cae 'Microsoft.App/managedEnvironments@2024-10-02-preview' = { +resource cae 'Microsoft.App/managedEnvironments@2025-07-01' = { name: name location: location tags: tags diff --git a/infra/bicep/modules/key-vault-rbac.bicep b/infra/bicep/modules/key-vault-rbac.bicep index 0982190..70b7c10 100644 --- a/infra/bicep/modules/key-vault-rbac.bicep +++ b/infra/bicep/modules/key-vault-rbac.bicep @@ -13,7 +13,7 @@ param principalIds array // Reference: https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#key-vault-secrets-user var keyVaultSecretsUserRoleId = '4633458b-17de-408a-b874-0445c86b69e6' -resource kv 'Microsoft.KeyVault/vaults@2024-04-01-preview' existing = { +resource kv 'Microsoft.KeyVault/vaults@2024-11-01' existing = { name: keyVaultName } diff --git a/infra/bicep/modules/key-vault.bicep b/infra/bicep/modules/key-vault.bicep index 4e97ea1..d0f71bb 100644 --- a/infra/bicep/modules/key-vault.bicep +++ b/infra/bicep/modules/key-vault.bicep @@ -28,7 +28,7 @@ param softDeleteRetentionInDays int = 7 @description('When true, the vault cannot be purged before retention expires (irreversible). Required for prod compliance; off in non-prod so churned environments do not pile up undeletable vaults.') param enablePurgeProtection bool = false -resource kv 'Microsoft.KeyVault/vaults@2024-04-01-preview' = { +resource kv 'Microsoft.KeyVault/vaults@2024-11-01' = { name: name location: location tags: tags diff --git a/nuget.config b/nuget.config new file mode 100644 index 0000000..128d95e --- /dev/null +++ b/nuget.config @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/scripts/bootstrap-env.sh b/scripts/bootstrap-env.sh index b76b11f..f999c3f 100755 --- a/scripts/bootstrap-env.sh +++ b/scripts/bootstrap-env.sh @@ -1,29 +1,13 @@ #!/usr/bin/env bash -# scripts/bootstrap-env.sh — one-time per-environment bootstrap for the cloud CD pipeline. +# bootstrap-env.sh — one-time per-tier bootstrap (RG + UAMI + FIC + RBAC + GH env). +# Idempotent. See docs/operations.md § "Provisioning a brand-new tier". # -# Azure side (declarative): infra/bicep/bootstrap.bicep deploys -# 1. Resource group rg-ftgo-${ENV}-eastus -# 2. User-assigned MI ftgo-${ENV}-cd-mi -# 3. Federated credential github-${ENV} (subject: repo:OWNER/REPO:environment:ENV) -# 4. RBAC Contributor on the RG (+ User Access Admin for prod) -# -# GitHub side (imperative — outside Azure ARM): -# 5. GitHub Environment ${ENV} (with required reviewer for prod) -# 6. GH env vars AZURE_CLIENT_ID, AZURE_SUBSCRIPTION_ID -# 7. Repo secret AZURE_TENANT_ID (one-time, shared across envs) -# -# Idempotent: safe to re-run. Bicep deployment uses deterministic names; gh PUT -# semantics upsert. -# -# Usage: -# ./scripts/bootstrap-env.sh ENV=ci -# ./scripts/bootstrap-env.sh ENV=ppe -# ./scripts/bootstrap-env.sh ENV=prod -# -# Prereqs: bash 4+, az CLI logged in to the target subscription with Owner role, -# gh CLI authenticated to the repo, jq. +# Usage: ./scripts/bootstrap-env.sh ENV=ci|ppe|prod +# Prereqs: bash 4+, az CLI (Owner), gh CLI, jq. set -euo pipefail +IFS=$'\n\t' +trap 'echo "FATAL: $(basename "$0") failed on line $LINENO" >&2; exit 1' ERR if (( BASH_VERSINFO[0] < 4 )); then echo "ERROR: bash 4+ required (you have ${BASH_VERSION})." >&2 @@ -35,7 +19,7 @@ ENV="" for arg in "$@"; do case "$arg" in ENV=*) ENV="${arg#ENV=}" ;; - -h|--help) sed -n '2,24p' "$0" | sed 's/^# \?//'; exit 0 ;; + -h|--help) sed -n '2,7p' "$0" | sed 's/^# \?//'; exit 0 ;; *) echo "unknown arg: $arg (expected ENV=ci|ppe|prod)" >&2; exit 2 ;; esac done diff --git a/scripts/provision-apps.sh b/scripts/provision-apps.sh index 4c7ab59..2640d44 100755 --- a/scripts/provision-apps.sh +++ b/scripts/provision-apps.sh @@ -1,41 +1,18 @@ #!/usr/bin/env bash -# scripts/provision-apps.sh — single deployment entrypoint for cloud envs (ci|ppe|prod). -# -# Run this MANUALLY (requires Owner at root scope to write app regs + repo admin to write -# GitHub vars) on a fresh env or whenever Entra app regs change. CD redeploys then pick -# up the resolved entraConfig from the GitHub env-level variable and pass it through to -# bicep — env vars persist across pushes (no more drift). -# -# Workflow: -# -# 1. (cold only) Deploy infra/bicep/azure.bicep with entraConfig={} to create the -# Container Apps + system MIs. Skipped when the kitchen-worker container app -# already exists. -# 2. Read each container app's system-assigned MI principalId AND resolve the BFF -# MI's clientId (subject of the BFF federated credential). -# 3. Deploy infra/bicep/main.bicep at tenant scope to (re-)create the per-env Entra -# app registrations, grant Orders.Process to the kitchen-worker MI's principalId, -# AND create the BFF federated identity credential (subject = BFF MI clientId, -# audience = api://AzureADTokenExchange) — all declaratively via the Microsoft.Graph -# Bicep extension. Idempotent (matches by uniqueName). -# 4. Resolve the kitchen-worker MI's appId (clientId), used in OrdersApi's -# EntraAuth__AllowedClientApps allow-list. -# 5. Re-deploy infra/bicep/azure.bicep with a populated entraConfig object. ARM merges -# the env-var changes into the container app templates declaratively. -# 6. Publish entraConfig as the ENTRA_CONFIG_JSON env-level GitHub variable so cd.yml -# can re-pass it on subsequent CD redeploys. -# -# IMAGE_TAG: optional; defaults to `latest`. +# provision-apps.sh — single deploy entrypoint for cloud tiers (ci|ppe|prod). +# Six-step cold/warm flow (azure → tenant → entra → wire-back). Idempotent. +# Full procedure: docs/deploy-cloud.md. # # Usage: # ./scripts/provision-apps.sh ENV=ci # IMAGE_TAG=sha-abc1234 ./scripts/provision-apps.sh ENV=ci -# LOCATION=westeurope ./scripts/provision-apps.sh ENV=ci # override region (default: eastus) -# WHAT_IF=1 ./scripts/provision-apps.sh ENV=ppe # preview only; no resource changes +# LOCATION=westeurope WHAT_IF=1 ./scripts/provision-apps.sh ENV=ppe # -# Prereqs: bash 4+, az CLI logged in, gh CLI authenticated, jq. +# Prereqs: bash 4+, az CLI (Owner at root), gh CLI (repo admin), jq. set -euo pipefail +IFS=$'\n\t' +trap 'echo "FATAL: $(basename "$0") failed on line $LINENO" >&2; exit 1' ERR if (( BASH_VERSINFO[0] < 4 )); then echo "ERROR: bash 4+ required (you have ${BASH_VERSION})." >&2 @@ -47,7 +24,7 @@ for arg in "$@"; do case "$arg" in ENV=*) ENV="${arg#ENV=}" ;; IMAGE_TAG=*) IMAGE_TAG="${arg#IMAGE_TAG=}" ;; - -h|--help) sed -n '2,38p' "$0" | sed 's/^# \?//'; exit 0 ;; + -h|--help) sed -n '2,11p' "$0" | sed 's/^# \?//'; exit 0 ;; *) echo "unknown arg: $arg (expected ENV=ci|ppe|prod [IMAGE_TAG=...])" >&2; exit 2 ;; esac done @@ -98,7 +75,8 @@ deploy_azure_bicep() { # Status messages → stderr; deployment name → stdout (so caller can capture). local entra_config_json="$1" local stage="$2" - local name="ftgo-${ENV}-$(date -u +%Y%m%d%H%M%S)-${stage}" + local name + name="ftgo-${ENV}-$(date -u +%Y%m%d%H%M%S)-${stage}" echo " deploying azure.bicep (name=$name, entraConfig=$([[ "$entra_config_json" == "{}" ]] && echo empty || echo populated))" >&2 if [[ "${WHAT_IF:-0}" == "1" ]]; then echo " WHAT_IF=1 — preview only, skipping create" >&2 diff --git a/src/Ftgo.ApiGateway/Program.cs b/src/Ftgo.ApiGateway/Program.cs index 92965d2..203e48b 100644 --- a/src/Ftgo.ApiGateway/Program.cs +++ b/src/Ftgo.ApiGateway/Program.cs @@ -3,6 +3,8 @@ using Microsoft.Identity.Web; var builder = WebApplication.CreateBuilder(args); +builder.WebHost.ConfigureEntraAuthKestrel(); +builder.Services.AddEntraAuthHttpsRedirection(); builder.Services.AddEntraAuth(builder.Configuration, auth => { @@ -23,8 +25,7 @@ var app = builder.Build(); app.UseForwardedHeaders(); -// API gateway serves the Scalar HTML UI; use the Scalar-friendly CSP preset so inline scripts/styles -// + the in-browser Auth Code + PKCE call to login.microsoftonline.com are not blocked. +app.UseHttpsRedirection(); app.UseEntraAuthSecurityHeaders(EntraAuthSecurityHeaderOptions.ScalarFriendly()); app.UseEntraAuthProblemDetails(); app.UseAuthentication(); diff --git a/src/Ftgo.Auth/EntraAuthHostingExtensions.cs b/src/Ftgo.Auth/EntraAuthHostingExtensions.cs new file mode 100644 index 0000000..b248b1b --- /dev/null +++ b/src/Ftgo.Auth/EntraAuthHostingExtensions.cs @@ -0,0 +1,33 @@ +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Hosting; +using Microsoft.AspNetCore.Http; +using Microsoft.Extensions.DependencyInjection; + +namespace Ftgo.Auth; + +/// Hardened Kestrel + HTTPS-redirect defaults. +/// Defense-in-depth alongside the platform's own ingress (e.g. ACA enforces HTTPS at the edge). +public static class EntraAuthHostingExtensions +{ + public const long DefaultMaxRequestBodyBytes = 10L * 1024L * 1024L; + + /// Pins Kestrel limits suitable for a JSON-only API workload (10 MB body cap, 30 s headers, 65 s keep-alive). + public static IWebHostBuilder ConfigureEntraAuthKestrel(this IWebHostBuilder webHost) + { + ArgumentNullException.ThrowIfNull(webHost); + return webHost.ConfigureKestrel(options => + { + options.Limits.MaxRequestBodySize = DefaultMaxRequestBodyBytes; + options.Limits.RequestHeadersTimeout = TimeSpan.FromSeconds(30); + options.Limits.KeepAliveTimeout = TimeSpan.FromSeconds(65); + options.AddServerHeader = false; + }); + } + + /// Registers a permanent (308) HTTPS redirect for any non-HTTPS request that bypasses platform ingress. + public static IServiceCollection AddEntraAuthHttpsRedirection(this IServiceCollection services) + { + ArgumentNullException.ThrowIfNull(services); + return services.AddHttpsRedirection(o => o.RedirectStatusCode = StatusCodes.Status308PermanentRedirect); + } +} diff --git a/src/Ftgo.Auth/EntraAuthRateLimiterExtensions.cs b/src/Ftgo.Auth/EntraAuthRateLimiterExtensions.cs index b31f2f9..062c231 100644 --- a/src/Ftgo.Auth/EntraAuthRateLimiterExtensions.cs +++ b/src/Ftgo.Auth/EntraAuthRateLimiterExtensions.cs @@ -36,6 +36,14 @@ public static IServiceCollection AddEntraAuthRateLimiter( var opts = new EntraAuthRateLimiterOptions(); configure?.Invoke(opts); + var validation = new EntraAuthRateLimiterOptionsValidator().Validate(name: null, opts); + if (validation.Failed) + { + throw new ArgumentException( + $"{nameof(EntraAuthRateLimiterOptions)} is invalid: {string.Join("; ", validation.Failures ?? Array.Empty())}", + nameof(configure)); + } + services.AddRateLimiter(rl => { rl.RejectionStatusCode = StatusCodes.Status429TooManyRequests; @@ -124,18 +132,36 @@ internal static (string Key, bool IsApp) PartitionKeyAndKind(HttpContext ctx) } /// Knobs for . -/// Defaults: 100 req / 60 s / 6-segment sliding window for users; 1000 req / 60 s for app-only callers. -/// Cite ASP.NET Core docs: . +/// Defaults: 100 req / 60 s / 6-segment sliding window for users; 1000 req / 60 s for app-only callers. public sealed class EntraAuthRateLimiterOptions { /// Permit ceiling for a delegated-user partition (or anonymous IP partition). public int PermitLimit { get; set; } = 100; /// Permit ceiling for an app-only (service-principal) partition. Defaults to 10× - /// because a single app identity often fronts many concurrent end-users (gateway → downstream API, - /// worker → API). Sharing the per-user budget across them would self-throttle normal traffic. + /// because one service principal often fronts many end-users (gateway → downstream, worker → API). public int AppPermitLimit { get; set; } = 1000; public TimeSpan Window { get; set; } = TimeSpan.FromSeconds(60); public int SegmentsPerWindow { get; set; } = 6; } + +internal sealed class EntraAuthRateLimiterOptionsValidator : Microsoft.Extensions.Options.IValidateOptions +{ + public Microsoft.Extensions.Options.ValidateOptionsResult Validate(string? name, EntraAuthRateLimiterOptions options) + { + ArgumentNullException.ThrowIfNull(options); + var failures = new List(); + if (options.PermitLimit <= 0) + failures.Add($"{nameof(EntraAuthRateLimiterOptions.PermitLimit)} must be > 0 (got {options.PermitLimit})."); + if (options.AppPermitLimit <= 0) + failures.Add($"{nameof(EntraAuthRateLimiterOptions.AppPermitLimit)} must be > 0 (got {options.AppPermitLimit})."); + if (options.Window <= TimeSpan.Zero) + failures.Add($"{nameof(EntraAuthRateLimiterOptions.Window)} must be > 0 (got {options.Window})."); + if (options.SegmentsPerWindow <= 0) + failures.Add($"{nameof(EntraAuthRateLimiterOptions.SegmentsPerWindow)} must be > 0 (got {options.SegmentsPerWindow})."); + return failures.Count == 0 + ? Microsoft.Extensions.Options.ValidateOptionsResult.Success + : Microsoft.Extensions.Options.ValidateOptionsResult.Fail(failures); + } +} diff --git a/src/Ftgo.Orders.Api/OrdersAuthorizationPolicies.cs b/src/Ftgo.Orders.Api/OrdersAuthorizationPolicies.cs index 3d68429..dd3782c 100644 --- a/src/Ftgo.Orders.Api/OrdersAuthorizationPolicies.cs +++ b/src/Ftgo.Orders.Api/OrdersAuthorizationPolicies.cs @@ -1,21 +1,11 @@ namespace Ftgo.Orders.Api; -/// -/// Named authorization policy identifiers for Orders.Api. -/// -/// -/// Doctrine: APIs that accept both delegated and app callers expose two separate named -/// policies — never an OR-claims policy and never a single policy that ORs scp with -/// roles. Each policy is mutually exclusive with the other (a token with both scp -/// and roles is rejected by both). See dotnet-engineering-guide ch02 §10 and -/// decision-trees.md -/// Tree 4. -/// +/// Named authorization policy identifiers for Orders.Api. See DOCTRINE.md § "Authorization-policy doctrine". public static class OrdersAuthorizationPolicies { - /// Delegated (user-token, OBO) callers — requires scp=orders.read and rejects tokens with roles. + /// Delegated (user-token, OBO) — requires scp=orders.read and rejects tokens carrying roles. public const string Delegated = "OrdersDelegated"; - /// App-only (S2S) callers — requires roles=Orders.Process, an allow-listed azp, and rejects tokens with scp. + /// App-only (S2S) — requires roles=Orders.Process, allow-listed azp, and rejects tokens carrying scp. public const string App = "OrdersApp"; } diff --git a/src/Ftgo.Orders.Api/Program.cs b/src/Ftgo.Orders.Api/Program.cs index 9a15ea9..bdcfbad 100644 --- a/src/Ftgo.Orders.Api/Program.cs +++ b/src/Ftgo.Orders.Api/Program.cs @@ -3,6 +3,8 @@ using Scalar.AspNetCore; var builder = WebApplication.CreateBuilder(args); +builder.WebHost.ConfigureEntraAuthKestrel(); +builder.Services.AddEntraAuthHttpsRedirection(); builder.Services.AddEntraAuth(builder.Configuration); builder.Services.AddEntraAuthWebTelemetry("Ftgo.Orders.Api"); builder.Services.AddEntraAuthProblemDetails(); @@ -10,9 +12,6 @@ builder.Services.AddControllers(); builder.Services.AddHealthChecks(); -// Doctrine: split delegated vs app into two named, mutually-exclusive policies. -// Never accept tokens that carry both `scp` and `roles` — that means a misconfigured app -// registration minted a mixed token. See eng-guide ch02 §10 / decision-trees.md Tree 4. builder.Services.AddAuthorization(o => { o.AddDelegatedPolicy(OrdersAuthorizationPolicies.Delegated, "orders.read"); @@ -24,6 +23,7 @@ var app = builder.Build(); app.UseForwardedHeaders(); +app.UseHttpsRedirection(); app.UseEntraAuthSecurityHeaders(EntraAuthSecurityHeaderOptions.ScalarFriendly()); app.UseEntraAuthProblemDetails(); app.UseAuthentication(); diff --git a/src/Ftgo.Restaurants.Api/Program.cs b/src/Ftgo.Restaurants.Api/Program.cs index 410a1e4..96060a8 100644 --- a/src/Ftgo.Restaurants.Api/Program.cs +++ b/src/Ftgo.Restaurants.Api/Program.cs @@ -3,6 +3,8 @@ using Scalar.AspNetCore; var builder = WebApplication.CreateBuilder(args); +builder.WebHost.ConfigureEntraAuthKestrel(); +builder.Services.AddEntraAuthHttpsRedirection(); builder.Services.AddEntraAuth(builder.Configuration); builder.Services.AddEntraAuthWebTelemetry("Ftgo.Restaurants.Api"); builder.Services.AddEntraAuthProblemDetails(); @@ -10,11 +12,6 @@ builder.Services.AddControllers(); builder.Services.AddHealthChecks(); -// Doctrine: app-only multi-tenant resource API. One named policy combining -// (a) Restaurants.Read.All app-role, (b) azp allow-list, (c) rejection of any token -// that also carries `scp`. Never use [Authorize(Roles=...)] when MapInboundClaims=false — -// the framework looks for ClaimTypes.Role, not the short `roles` name Entra emits. -// See eng-guide ch02 §10 / decision-trees.md Tree 4. builder.Services.AddAuthorization(o => { o.AddAppPolicy(RestaurantsAuthorizationPolicies.App, "Restaurants.Read.All"); @@ -25,6 +22,7 @@ var app = builder.Build(); app.UseForwardedHeaders(); +app.UseHttpsRedirection(); app.UseEntraAuthSecurityHeaders(EntraAuthSecurityHeaderOptions.ScalarFriendly()); app.UseEntraAuthProblemDetails(); app.UseAuthentication(); diff --git a/src/Ftgo.Restaurants.Api/RestaurantsAuthorizationPolicies.cs b/src/Ftgo.Restaurants.Api/RestaurantsAuthorizationPolicies.cs index 1e5418e..fa1882e 100644 --- a/src/Ftgo.Restaurants.Api/RestaurantsAuthorizationPolicies.cs +++ b/src/Ftgo.Restaurants.Api/RestaurantsAuthorizationPolicies.cs @@ -1,23 +1,8 @@ namespace Ftgo.Restaurants.Api; -/// -/// Named authorization policy identifiers for Restaurants.Api. -/// -/// -/// Doctrine: app-only multi-tenant resource APIs expose one named policy that combines -/// the app-role check with the azp allow-list and rejects any token that also -/// carries scp. Never use [Authorize(Roles = "...")] on a JWT scheme that -/// has MapInboundClaims = false — the framework looks for claims of type -/// ClaimTypes.Role, but Entra emits the role claim under the short name roles, -/// so the role check would silently fail and either lock everyone out or (worse) lock no -/// one out depending on what other Authorize attributes are also applied. See -/// dotnet-engineering-guide ch02 §10 and -/// decision-trees.md -/// Tree 4. -/// +/// Named authorization policy identifiers for Restaurants.Api. See DOCTRINE.md § "Authorization-policy doctrine". public static class RestaurantsAuthorizationPolicies { - /// App-only (S2S, multi-tenant) callers — requires roles=Restaurants.Read.All, - /// an allow-listed azp, and rejects tokens with scp. + /// App-only (S2S, multi-tenant) — requires roles=Restaurants.Read.All, allow-listed azp, and rejects tokens carrying scp. public const string App = "RestaurantsApp"; } diff --git a/tests/Ftgo.Auth.Tests/DownstreamProbeServiceTests.cs b/tests/Ftgo.Auth.Tests/DownstreamProbeServiceTests.cs new file mode 100644 index 0000000..beebcab --- /dev/null +++ b/tests/Ftgo.Auth.Tests/DownstreamProbeServiceTests.cs @@ -0,0 +1,88 @@ +using System.Net; +using Ftgo.Auth; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using Shouldly; +using Xunit; + +namespace Ftgo.Auth.Tests; + +public sealed class DownstreamProbeServiceTests +{ + [Fact] + public async Task ExecuteAsync_LogsResult_AndStopsHost_OnSuccess() + { + var lifetime = new FakeLifetime(); + var handler = new StaticResponseHandler(new HttpResponseMessage(HttpStatusCode.OK) { Content = new StringContent("ok") }); + var client = BuildClient(handler); + + var svc = new DownstreamProbeService( + client, + Options.Create(new DownstreamApiOptions { BaseUrl = "https://api.test/", Scope = "api/.default", ProbePath = "/health/ready" }), + lifetime, + NullLogger.Instance); + + await svc.StartAsync(TestContext.Current.CancellationToken); + await svc.ExecuteTask!; + + lifetime.StopRequested.ShouldBeTrue(); + } + + [Fact] + public async Task ExecuteAsync_SetsExitCode1_AndStopsHost_OnException() + { + Environment.ExitCode = 0; + var lifetime = new FakeLifetime(); + var client = BuildClient(new ThrowingHandler()); + + var svc = new DownstreamProbeService( + client, + Options.Create(new DownstreamApiOptions { BaseUrl = "https://api.test/", Scope = "api/.default", ProbePath = "/health/ready" }), + lifetime, + NullLogger.Instance); + + await svc.StartAsync(TestContext.Current.CancellationToken); + await svc.ExecuteTask!; + + Environment.ExitCode.ShouldBe(1); + lifetime.StopRequested.ShouldBeTrue(); + Environment.ExitCode = 0; + } + + // Cancellation behaviour is exercised implicitly via StopAsync at host shutdown — the + // BackgroundService swallows OperationCanceledException when stoppingToken fires (see + // DownstreamProbeService:23). Skip a dedicated test: it relies on framework-internal timing + // and adds noise without catching real regressions. + + private static DownstreamApiClient BuildClient(HttpMessageHandler handler) => + new(new HttpClient(handler) { BaseAddress = new Uri("https://api.test/") }, + new StubTokenProvider("stub-token"), + NullLogger.Instance); + + private sealed class StubTokenProvider(string token) : IAppTokenProvider + { + public ValueTask GetAccessTokenAsync(string scope, CancellationToken cancellationToken) => new(token); + } + + private sealed class FakeLifetime : IHostApplicationLifetime + { + public CancellationToken ApplicationStarted => CancellationToken.None; + public CancellationToken ApplicationStopped => CancellationToken.None; + public CancellationToken ApplicationStopping => CancellationToken.None; + public bool StopRequested { get; private set; } + public void StopApplication() => StopRequested = true; + } + + private sealed class StaticResponseHandler(HttpResponseMessage response) : HttpMessageHandler + { + protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) => + Task.FromResult(response); + } + + private sealed class ThrowingHandler : HttpMessageHandler + { + protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) => + throw new HttpRequestException("downstream unreachable"); + } +} diff --git a/tests/Ftgo.Auth.Tests/EntraAuthRateLimiterOptionsValidatorTests.cs b/tests/Ftgo.Auth.Tests/EntraAuthRateLimiterOptionsValidatorTests.cs new file mode 100644 index 0000000..7216576 --- /dev/null +++ b/tests/Ftgo.Auth.Tests/EntraAuthRateLimiterOptionsValidatorTests.cs @@ -0,0 +1,49 @@ +using Ftgo.Auth; +using Microsoft.Extensions.DependencyInjection; +using Shouldly; +using Xunit; + +namespace Ftgo.Auth.Tests; + +public sealed class EntraAuthRateLimiterOptionsValidatorTests +{ + [Fact] + public void AddEntraAuthRateLimiter_Throws_WhenPermitLimit_NonPositive() + { + var services = new ServiceCollection(); + + var ex = Should.Throw(() => + services.AddEntraAuthRateLimiter(o => o.PermitLimit = 0)); + + ex.Message.ShouldContain(nameof(EntraAuthRateLimiterOptions.PermitLimit)); + } + + [Fact] + public void AddEntraAuthRateLimiter_Throws_WhenAppPermitLimit_NonPositive() + { + var services = new ServiceCollection(); + + var ex = Should.Throw(() => + services.AddEntraAuthRateLimiter(o => o.AppPermitLimit = -1)); + + ex.Message.ShouldContain(nameof(EntraAuthRateLimiterOptions.AppPermitLimit)); + } + + [Fact] + public void AddEntraAuthRateLimiter_Throws_WhenWindow_Zero() + { + var services = new ServiceCollection(); + + var ex = Should.Throw(() => + services.AddEntraAuthRateLimiter(o => o.Window = TimeSpan.Zero)); + + ex.Message.ShouldContain(nameof(EntraAuthRateLimiterOptions.Window)); + } + + [Fact] + public void AddEntraAuthRateLimiter_Succeeds_WithDefaults() + { + var services = new ServiceCollection(); + Should.NotThrow(() => services.AddEntraAuthRateLimiter()); + } +}