diff --git a/dev-docs/.vitepress/config.ts b/dev-docs/.vitepress/config.ts index 8c4b20f5..29fea894 100644 --- a/dev-docs/.vitepress/config.ts +++ b/dev-docs/.vitepress/config.ts @@ -80,6 +80,7 @@ export default withMermaid(defineConfig({ { text: '⭐ Federation v1 — Implementation Spec', link: '/future-features/federation-v1-design' }, { text: '⭐ Account Lifecycle — Email + Deletion Plan', link: '/future-features/lifecycle-email-deletion-plan' }, { text: '⭐ Production-Readiness Audit 2026-05-13', link: '/future-features/production-readiness-audit-2026-05-13' }, + { text: '⭐ Logging & Audit Redesign', link: '/future-features/logging-audit-redesign' }, { text: 'HA / Multi-Instance Readiness', link: '/future-features/ha-multi-instance' }, { text: 'Realm Backup / Restore / DR', link: '/future-features/realm-backup-restore' }, { text: 'Enterprise SSO — SAML + LDAP', link: '/future-features/enterprise-sso-saml-ldap' }, @@ -113,6 +114,7 @@ export default withMermaid(defineConfig({ { text: 'Overview', link: '/architecture/' }, { text: 'Authentication slice', link: '/architecture/authentication' }, { text: 'Authorization slice', link: '/architecture/authorization' }, + { text: 'Persistence model (ES vs. flat docs)', link: '/architecture/persistence-model' }, ], }, { diff --git a/dev-docs/architecture/persistence-model.md b/dev-docs/architecture/persistence-model.md new file mode 100644 index 00000000..8ce0a9fd --- /dev/null +++ b/dev-docs/architecture/persistence-model.md @@ -0,0 +1,53 @@ +# Persistence model — event sourcing vs. flat documents + +> **Status: accepted (2026-06-04).** Modgud is a *hybrid* persistence system by design — event-sourced aggregates **and** flat Marten documents in the same store. This page records when to reach for which on a **new** feature, and the discipline that keeps the hybrid safe as the app grows. **Decision: keep the existing model as-is (no rebuild); choose best-fit per feature going forward.** + +## Why this exists + +The IAM core was rebuilt on an event-sourced foundation (Marten), and event sourcing (ES) carries a real, recurring tax: projection-vs-aggregation modelling, no cheap targeted per-stream rebuild, source-gen for every `Apply`/`Create`, subclass registration in two places, event evolution via tolerant JSON, and — the big one — the tension between an append-only log and GDPR erasure (mask-bytes-in-place + archive + scrub-the-projection, instead of a plain `DELETE`). That tax is worth paying where a feature genuinely needs ES, and pure overhead where it does not. This doc stops "we event-source here because we event-source everywhere" from becoming the default. + +## The decision + +**Default to a flat Marten document.** Reach for an event-sourced aggregate only when the feature needs at least one of: + +| Trigger | Example | +|---|---| +| **History / "who changed what, when" is a first-class requirement** | the user-aggregate audit trail; OAuth client config changes | +| **Non-trivial invariants / a real state machine** | account lifecycle (active → locked → deactivated → deleted), OAuth grant state | +| **Temporal queries / rebuildable read models** | "what did this look like at time X"; projecting one stream into several views | + +If none of these apply — settings, lookups, associations, caches, ephemeral challenges, the streamless security log — use a **flat document**. The friction of ES buys nothing there. + +This is *already* how Modgud is built: the user and OAuth aggregates are event-sourced; `ApplicationUser`, `UserSecurityData`, sessions, external links, passkeys, `RealmSettings`, and the streamless `SecurityAuditEntry` store are flat documents. "Best fit per feature" is not a new direction — it is the existing de-facto architecture, written down. + +## The safety rules (what keeps a hybrid from breaking) + +The recurring worry with mixing ES and plain CRUD is: *a stored event references an entity that lives in another store and was hard-deleted — does replay/projection then dangle?* This is a **design-discipline** problem, not an ES-vs-CRUD problem (any system with references and deletes must answer it — even pure CRUD chooses cascade vs. set-null per foreign key). ES only forces you to confront it earlier, because replay asks "and if the target is gone?". The rules: + +1. **Cross-boundary references are IDs, resolved at read time, and must tolerate absence.** A projection or read endpoint that joins to another store shows a tombstone / "unknown" when the target is gone — it never assumes existence and never crashes. There are no hard cross-aggregate foreign keys. +2. **Tombstone — don't hard-delete — anything referenced as a resolution source.** Hard-delete only leaf/secondary data that nothing resolves against. Keep a tombstone for anything other streams or projections point at. +3. **Events are self-contained.** A projection reads the **event**, not a live lookup that can vanish. Either the event carries what the projection needs, or the projection joins at read time (rule 1) — never a hard dependency on a mutable row in another store. +4. **One aggregate = one consistency boundary.** Don't span a single invariant transactionally across an ES aggregate and a flat document and expect referential integrity — across that line it is eventually consistent. What must be valid *together* belongs in the same aggregate. +5. **Don't bake mutable cross-store data into a projection.** A projection should be self-sufficient from its own stream, or join at read time. Baking in another store's mutable field makes the projection silently stale until a rebuild. + +## Worked example — the GDPR erase (this discipline, in production) + +`GdprService.PerformPermanentEraseAsync` is the hybrid under maximum stress (an entity *must* disappear, but events reference it): + +- The user's **event stream** is PII-masked in place (`ApplyEventDataMasking`) then **archived** — kept, hidden from live queries. +- `ApplicationUser` (a flat doc) becomes a **tombstone** (`deleted-{guid}`) — *deliberately not deleted*, so references still resolve (rule 2). +- Seven **streamless secondary docs are hard-deleted** (`UserSession`, `UserSecurityData`, `ExternalClaimsStore`, `StoredPasskeyCredential`, `UserChangeRequest`, `EmailOtpChallenge`, `ExternalIdentityLink`) — nothing resolves against them at read time. +- The `AuthAuditView` projection holds only the pseudonymous `UserId`; at read time it joins to `ApplicationUser` and an erased user surfaces as `deleted-{guid}` (rules 1 + 3). The "external identity linked" audit row survives even though the link doc is gone, because the row was projected from the **event**, not the deleted doc. + +No dangling reference, because the discipline is applied: tombstone what is referenced, hard-delete only leaves, resolve tolerantly, keep events self-sufficient. + +## What we will NOT do + +- **No rebuild of the existing ES aggregates.** The tax is already paid (the machinery exists, the gotchas are documented in `engineering-gotchas/`); ripping it out is high-cost, high-risk, low-ROI. "Was ES the right call?" and "should we change it?" have different answers — the second is *no*. +- **No new event-sourced store just for symmetry.** (The first audit-redesign draft proposed exactly that and it was correctly discarded — see `future-features/logging-audit-redesign.md`.) + +## Consequences + +- Marten supports event streams **and** documents in one store, session, transaction, and Wolverine outbox — so the hybrid is first-class, not bolted-on, and a single feature can even use both atomically. This is a reason to *stay* on the current foundation rather than migrate. +- New features get the simpler model by default, so the ES tax stops spreading by inertia. +- The five rules above are the price of the hybrid: they must be checked whenever a new reference crosses a store boundary or a new delete path is added. diff --git a/dev-docs/codeql-triage.md b/dev-docs/codeql-triage.md index d90d4728..7012ce3c 100644 --- a/dev-docs/codeql-triage.md +++ b/dev-docs/codeql-triage.md @@ -293,3 +293,21 @@ a valid reason — each finding gets its own audit. document if the exact invocation matters. - The post-flip `codeql.yml` change that triggered this: commit `ebcdb2c`. + +## Addendum 2026-06-04 — Bucket 3: `cs/exposure-of-sensitive-information` (masked-PII logging) + +The logging/audit-redesign PR (#51) surfaced three new `cs/exposure-of-sensitive-information` alerts (medium). All false positives. **Resolved by dismissal**, not config — see the "why not a config fix" finding below, which is the load-bearing part of this entry (it stops the next maintainer re-investigating). + +- **#42 `PendingAdminInviteService.cs:217` (prod):** `logger.LogInformation("… Email={MaskedEmail}", …, LogPiiMasking.MaskEmail(invite.Email))`. The email *is* masked before logging — but `MaskEmail` keeps the first local-part char + the full domain (deliberate, for ops triage), so CodeQL follows the method body, sees the return derived from the input, and flags it. Dismissed **false positive** (the value is masked per policy). +- **#40/#41 `OtelLogsRedactionTests.cs:122/131` (test):** the OTel-redaction E2E test logs a *synthetic* email/IP/JWT through the real collector and then asserts they are absent from the export — it deliberately handles fake PII to prove redaction works. Dismissed **used in test** (the "Tests" bucket this triage already normalises). + +### Why not a config fix (verified locally with `gh codeql`, 2026-06-04) + +Both "durable" routes were built and tested against a local C# DB before being rejected — record this so it isn't re-attempted: + +1. **Models-as-Data (neutral/barrier/summary) does NOT work for this query.** `ExposureOfPrivateInformationQuery.qll` defines its barrier as `isBarrier(node) { node instanceof Sanitizer }` with `Sanitizer` a hardcoded abstract QL class — it never consults MaD. A `neutralModel` for `LogPiiMasking.MaskEmail`/`MaskUsername` loaded as **"unused"** and changed nothing. So you cannot teach this query a sanitizer via a data-extension pack. +2. **A custom query (thin wrapper adding `MaskEmail`/`MaskUsername` as a `Sanitizer` subclass) works but is operationally worse.** It correctly cleared #42 + the other masked sites locally. But it needs a new rule id (`cs/exposure-of-sensitive-information-modgud`) and excluding the built-in to avoid double-reporting — and code-scanning dismissals are keyed to the rule id, so the **~19 already-dismissed** findings (the local DB shows 26 total; CI shows 3 because 23 are dismissed) would re-appear under the new id. The one-time re-dismissal churn + maintaining a forked security query outweighs the benefit for an advisory FP. + +**Consequence / standing guidance:** a *future* masked-PII log site (`LogPiiMasking.Mask*` whose result is logged) will flag `cs/exposure-of-sensitive-information` again — dismiss it **false positive** with a one-line "masked via LogPiiMasking; query can't model the sanitizer (Bucket 3)". Only revisit the custom-query route if these become frequent enough that re-dismissing each one is more pain than a one-time 19-alert re-dismissal + a forked query. + +If `MaskEmail` is ever changed to leak more (e.g. keep the whole local-part), revisit the model: the point of the neutral model is that the masking is *sufficient*, not that the method name is magic. diff --git a/dev-docs/compliance/legitimate-interest-assessment-security-store.md b/dev-docs/compliance/legitimate-interest-assessment-security-store.md new file mode 100644 index 00000000..e4d73de2 --- /dev/null +++ b/dev-docs/compliance/legitimate-interest-assessment-security-store.md @@ -0,0 +1,84 @@ +--- +title: Legitimate-Interest Assessment — streamless security/ops store +description: GDPR Art. 6(1)(f) balancing test for the streamless security/ops audit store (SecurityAuditEntry). Companion to the logging/audit redesign §A.5. +--- + +# Legitimate-Interest Assessment (LIA) — streamless security/ops store + +> **Status:** Drafted 2026-06-03 alongside Phase 3 of the [logging/audit redesign](./../future-features/logging-audit-redesign.md). This is the Art. 6(1)(f) balancing test the design names as a production prerequisite (§A.5). It must be reviewed and signed off by the controller (the deploying operator) before the store processes real end-user data in production; a self-hosting operator adapts the "controller", "retention", and "disclosure" sections to their deployment and privacy policy. + +## 1. What is processed, and where + +The **streamless security/ops store** (`SecurityAuditEntry`, system DB, cross-realm) records security and operational events that have **no aggregate event stream to attach to** — they are not a registered user's personal data on that user's stream. It is the typed successor to the streamless portion of the retired `"Auth:"` log. Two record families: + +- **Security (tenant-relevant):** failed logins against an *unknown/inactive* username, invalid magic-link probes, rejected external/federation logins (allowlist / JIT-disabled / malformed / SAML signature failure), blocked identity-hijack and JIT email-conflict attempts, blocked privilege-escalation, rate-limit hits, DCR registration rejections, bootstrap-invite rejections. +- **Operational (platform-relevant):** signing-key / SAML-certificate rotation, SAML metadata refresh, recovery-CLI invocations, realm provisioning / adoption / control-plane transfer, account-lifecycle sweeps, DCR registration / GC / first-use, bootstrap-invite issuance, and the audit-of-the-audit (log cleared / exported). + +**Personal data it may contain:** an *attempted* username or email (an identifier of whoever made the attempt), a source **IP address** (personal data under CJEU *Breyer*, C-582/14), and — for operational actions — the **acting admin's** username. Emails are masked at the call site (`LogPiiMasking.MaskEmail`). It deliberately holds **no** secrets, tokens, invite codes, magic-link URLs, passwords, or request/response bodies. + +This LIA covers **only** this streamless store. The per-realm GDPR-audit (`AuthAuditView`) is a registered user's own personal data on their event stream, processed and erased per subject — it is *not* under legitimate interest and is out of scope here. + +## 2. Why not per-subject erasure (the boundary) + +A record about an **unidentified actor** has no user stream to mask/erase in place. Forcing it into the per-subject erase path is impossible for the unknown-actor case and, for the borderline known-actor case (e.g. a failed attempt whose attempted email later registers), would put an identified subject's security record *outside* the protections of their own erasable stream while pretending otherwise. The design draws the boundary explicitly: stream-backed = erasable in place; **streamless = lawful under legitimate interest, with short retention as the proportionality control** rather than per-subject erasure. This LIA is the documentation of that lawfulness. + +## 3. Purpose test — is there a legitimate interest? + +**Yes.** The purposes are: + +1. **Security / abuse detection.** Detecting and responding to credential-stuffing, password-spray, account-takeover attempts, federation misconfiguration probes, and SAML tampering is a textbook legitimate interest, expressly recognised by Recital 49 GDPR ("ensuring network and information security … constitutes a legitimate interest"). A realm-admin needs to *see* attacks against their realm's login surface to respond (lockout policy, IP blocks, alerting). +2. **Operational forensics / accountability.** A tamper-evident-enough record of privileged operational actions (key rotation, recovery-CLI break-glass, control-plane transfer, who cleared the audit) is necessary for incident response and operator accountability. + +The interest is **real and present** (these attacks happen continuously against any public IdP), not speculative. + +## 4. Necessity test — is the processing necessary? + +**Yes, and minimised.** The interest cannot be met without retaining *some* attacker-attributable signal: + +- **Raw IP is necessary, short-term.** Correlating attempts (same IP across many usernames = spray; many IPs against one username = stuffing), feeding manual/automatic IP blocking, and giving a realm-admin actionable "who, from where" all require the **raw** IP, not a hash or geo-only reduction: + - A **hash** defeats the primary use (you cannot block, allowlist, or range-correlate a salted hash; an unsalted hash is trivially reversible for IPv4 and so offers no real minimisation). + - **Geo-only** loses the per-host correlation that distinguishes an attack from noise and cannot drive a block. + - The minimisation applied instead is **time** (§5): the raw IP exists only for a short window, after which it is hard-deleted. +- **Attempted identifier is necessary.** "Failed login for *which* account" is the security signal a realm-admin acts on; without it the row is useless for detecting a targeted takeover. Emails are masked; an attempted bare username is retained as-is (it is not, on its own, more than the attacker chose to type). +- **No excess.** No bodies, tokens, secrets, cookies, or passwords are stored. The store records *occurrences and the minimum attribution*, not payloads. + +## 5. Proportionality / balancing test — does it override the data subject's interests? + +**On balance, yes, given the safeguards** — the controls below keep the impact on data subjects low and proportionate to the security benefit. + +**Retention = the proportionality control.** A **fixed, short hard-prune** (default **7 days**, `SecurityAuditPruneJob`) genuinely deletes rows past the window — this is a real deletion, not a hidden archive. The window is deliberately **not per-realm configurable**, so an operator cannot quietly turn a tight security window into an open-ended dossier. (Contrast: the per-realm GDPR-audit *visibility window* is a view bound over kept-with-the-account history, a different concept — see §A.6 of the design.) + +**Access is tightly gated:** + +- Read is gated on the `auth-log:read` permission (seeded onto the realm-admin and the User Manager role); no public-network exposure of the raw store. +- **Scope at read:** a tenant realm-admin sees only their **own realm's tenant-visible** rows; control-plane-only operational rows (`PlatformOnly`) are visible to the control-plane operator only. (Carried forward from PR #50's `ScopeToCallerRealm`, extended with the visibility gate.) +- **Clearing/exporting the log is itself audited** (audit-of-the-audit: `audit.log_cleared` / `audit.log_exported` records the operator + realm + timestamp), so destructive or exfiltrating operator actions leave their own trail. + +**Reasonable expectations.** A person attempting to log in to an identity provider would reasonably expect that failed/anomalous attempts are logged briefly for security — this is standard practice and aligns with Recital 49. The processing is not used for any secondary purpose (no profiling, no marketing, no automated decisions with legal effect). + +**Residual risk is low:** the data is minimal, masked where it is an email, access-controlled, realm-scoped, short-lived, and never enriched or sold. + +## 6. Data-subject rights + +- **Erasure (Art. 17).** Streamless records are retained under Art. 6(1)(f) with the short retention window as the safeguard; Art. 17(1)(c)/(3) permits retention of a de-identified-by-time security record. They are **not** swept by a registered user's permanent-erase (verified by test: a streamless row survives the user's erase). **Decision #4 (settled): time-expiry only** — Phase 3 does **not** scan-and-purge the store for a newly-registered user's email. Rationale: the short window expires pre-registration rows quickly; a scan-on-registration would itself require matching the new user's email against the store (more processing, more linkage) for marginal benefit. **This must be disclosed in the privacy policy** ("security-relevant login attempts, including from unregistered visitors, are retained for up to N days for abuse detection and are not linked to later accounts"). +- **Access / objection (Art. 15 / 21).** Pre-registration attempt records are treated as **time-expiring security records**, not as a per-individual file surfaced on an Art-15 request — there is no reliable, privacy-preserving way to authenticate that a requester "owns" an arbitrary attempted identifier/IP, and surfacing them would create a lookup oracle. The controller discloses this stance in the privacy policy. (If a controller chooses to honour such requests, identity verification and the resulting linkage must be assessed separately.) +- **Operational records about admins** (who rotated a key, ran the recovery CLI, cleared the log) are processed for accountability; the acting admin is an operator, not a consumer data subject, and the same short retention applies. + +## 7. Safeguards summary + +| Safeguard | Mechanism | +|---|---| +| Data minimisation | typed fields only; no secrets/tokens/bodies; emails masked (`LogPiiMasking.MaskEmail`) | +| Storage limitation | fixed short hard-prune (`SecurityAuditPruneJob`, default 7 days), not per-realm configurable | +| Access control | `auth-log:read`; realm + `PlatformOnly` scope at read; no public exposure | +| Accountability | audit-of-the-audit (`audit.log_cleared` / `audit.log_exported`) | +| Purpose limitation | security + operational forensics only; no profiling / secondary use | +| Boundary integrity | identified users' auth events stay on their erasable stream (the GDPR-audit), not here | + +## 8. Outcome + +Subject to controller sign-off and privacy-policy disclosure (§6), the processing in the streamless security/ops store is assessed as **lawful under Art. 6(1)(f)**: a real security interest, necessary and minimised, with the short fixed retention window plus access-gating and audit-of-the-audit keeping the impact on data subjects proportionate. + +**Controller actions before production:** (1) set/confirm the retention window and state it in the privacy policy; (2) disclose the unregistered-visitor logging + the time-expiry-only stance (§6); (3) record the sign-off (owner + date) below. + +> _Sign-off: ____________________ (controller) — date: ___________ diff --git a/dev-docs/future-features/index.md b/dev-docs/future-features/index.md index 23e524c2..18b64eef 100644 --- a/dev-docs/future-features/index.md +++ b/dev-docs/future-features/index.md @@ -21,6 +21,10 @@ Severity. Detail-Pages unten. [federation-v1-design](./federation-v1-design) — concretizes the agreed v1 model into real code seams (one seam: `ExternalLoginProcessor.ProcessAsync`; authz resolved late at token time): the unified claims-per-source store, the two-layer source filter, "session = lease" (mid-session timer rejected), the new per-provider/per-group flags. All design decisions A–G are settled; the doc is the build template. **✅ Shipped (PR #23 spec, PR #24 `0b70b31` broker → session-derived authz + v1.1 token layer).** +⭐ **Logging & Audit Redesign (2026-06-03):** +[logging-audit-redesign](./logging-audit-redesign) +— split today's `AuthLog` (a fragile Serilog "Auth:"-magic-prefix sink that also silently fails GDPR) into two tracks: (A) a typed, **durable** (Wolverine outbox), GDPR-erasable per-realm **audit** trail (event-sourced), and (B) a centralized **operational** logging track (OTel Logs → OTLP + a slim in-app platform live-tail). Grounded in existing conventions (outbox, GdprService masking, Inbox slice, RealmSettings). Has 7 open decisions + a 6-phase plan. Read before any audit/logging work. + ### Audit-Followups (in Severity-Reihenfolge) - Observability — OpenTelemetry / Metrics / Tracing — ✅ shipped (see diff --git a/dev-docs/future-features/logging-audit-redesign.md b/dev-docs/future-features/logging-audit-redesign.md new file mode 100644 index 00000000..3f51b896 --- /dev/null +++ b/dev-docs/future-features/logging-audit-redesign.md @@ -0,0 +1,773 @@ +--- +title: Logging & Audit Redesign +description: Stop treating the "AuthLog" as a store to build. Derive the tenant audit trail as a projection over events we already keep, route the streamless remainder to a short-retention security store under a legitimate-interest basis, and move operational logs onto OTel → an OpenObserve collector that redacts as a guarantee. +--- + +# Logging & Audit Redesign + +> **Status:** Design converged 2026-06-03 (cross-review + code-fact verification + +> adversarial review). **Fully implemented: Phases 0–5 shipped** (see +> [Phasing](#phasing)) — Track A (audit projection + streamless security store) and +> Track B (OTel log export + in-app per-realm error feed), each adversarially +> reviewed before commit. +> Supersedes the interim tenant-visibility patch +> ([PR #50](https://github.com/cocoar-dev/modgud/pull/50), which added per-realm +> scoping to today's `AuthLog`) **and** the first draft of this doc (which +> proposed building a *new* event-sourced audit store — now discarded; see +> [Why the first draft was wrong](#why-the-first-draft-was-wrong)). +> +> **Why:** Today's `AuthLog` conflates two different products into one fragile +> Serilog sink, silently fails one of them (GDPR), and *duplicates* telemetry +> that should already live on the event streams. The redesign builds **no new +> store**: the tenant audit trail is a *projection* over events we already keep, +> the operational log moves to a real telemetry backend, and the +> remainder that has no aggregate gets a short-retention security store. + +## The core insight: there is no audit store to build + +The durable, GDPR-correct tenant audit trail **already exists as a side effect of +event-sourcing the user aggregates**. It just isn't projected into a queryable +view — and a handful of events that belong on those streams aren't being appended +yet (the masking rules for them are already registered; the events simply don't +flow). So the redesign is not "build an audit store." It is four moves: + +1. **Finish the event sourcing.** Append the auth-telemetry events that already + have types *and* masking rules but aren't emitted (a login-success marker; a + login-failure record on a *known* user). +2. **Project** the existing user- and config-aggregate streams into one flat, + filterable `AuthAuditView` read model. Durability and GDPR masking are + **inherited** from the source events; the view is **rebuildable**; retention + becomes a **view window**. +3. **Route the streamless remainder** — attempts against *unknown* actors, probes, + rate-limit hits, operational actions — to a separate, short-retention security + /ops store, processed under a legitimate-interest basis. +4. **Wire the third OTel signal** (logs) to **OpenObserve** through an **OTel + Collector** whose redaction processor strips PII *as a guarantee*, retiring the + `"Auth:"` magic-prefix sink as the operational-log mechanism. + +### The load-bearing boundary: where an auth event lands + +One rule decides where every auth event goes — and the rule is about **whether a +stream exists to attach the event to**, not about whether the data is "personal." + +- An auth event about a **registered user** is attached to **that user's event + stream**. It is the data subject's personal data, it inherits the per-subject + masking that `GdprService` already applies, and it is therefore **erasable in + place**. +- An auth event about an **actor with no account** — a login attempt on a username + that matches no user, an anonymous probe, a rate-limit hit — has **no stream to + attach to**. It still **may contain personal data** (an attempted email is an + identifier; an IP is personal data under CJEU *Breyer*, C-582/14), so it is + **not** outside GDPR. It lands in the streamless security store and is processed + under **Art. 6(1)(f) legitimate interest** (security / fraud detection), with + **short retention as the proportionality control** rather than per-subject + erasure. + +> **This boundary is a design requirement, not a current fact.** Today the +> codebase is *positioned* to split here but does not enforce it: known-user +> failures don't append `UserLoginFailedEvent` either, so they currently look the +> same as unknown-user failures (Serilog-only, `AccountEndpoints.cs:103`). **Phase +> 1 is the enforcement** — once it lands, any known-user auth event that isn't +> appended to the user stream is a bug. + +## The problem: two concerns fused into one sink + +Today a single mechanism — a Serilog `ILogEventSink` that string-sniffs the +`"Auth:"` message prefix (`AuthLogService.cs:21`) — tries to be both an audit log +and a diagnostic log, and is the only thing resembling either. It serves two +**different** audiences with **different** requirements: + +| | **Tenant audit** (today's `AuthLog`) | **Operational logging** | +|---|---|---| +| **Audience** | Tenant admin — "what happened on *my* realm" | Platform team — "is the system healthy, what errored" | +| **Content** | Security/business events (login, rotation, admin action, GDPR) | Errors, diagnostics, stack traces, performance | +| **Properties** | typed, **durable**, per-realm, GDPR-erasable, queryable | high-volume, system-wide, **centralized**, short retention, trace-correlated | +| **Source of truth** | the **event streams we already keep** (derived view) | the telemetry backend (OpenObserve) | + +### The smells (verified against the code) + +1. **Audit is a side effect of logging.** The audit trail is a Serilog sink + keyed on `MessageTemplate.Text.StartsWith("Auth:")` (`AuthLogService.cs:21`), + registered globally (`Program.cs:939`). A log-level override, a typo in the + prefix, or a misconfigured `MinimumLevel` and an audit event **vanishes + silently**. ~33 call sites are coupled to a magic string. +2. **No taxonomy.** `Message` is free text; the only filter ("DCR only") is a + `Message.startsWith('DCR ')` substring match in the SPA. No event type, no + category, no per-event schema. +3. **Not durable.** An unbounded in-memory `Channel` drained by + a `BackgroundService` (`AuthLogService.cs`, `AuthLogPersistenceService`). A + crash between log and persist **loses the event**. The rest of the system + commits through a Wolverine outbox; this path does not. +4. **GDPR is a false promise.** `AuthLogDocument`'s own XML doc claims PII is + "masked at the ArchiveStream layer" — but it is a **flat** Marten doc, not an + event stream, and `GdprService` never touches it (verified: no code masks + `AuthLogDocument`). So a user's `UserName` (and any attempted-username on a + failed login) survives a GDPR erasure until the 7-day prune ages it out. +5. **It duplicates telemetry that belongs on the streams.** The masking rules for + `UserLoggedInEvent` and `UserLoginFailedEvent` are *already registered* + (`MartenStoreOptionsExtensions.cs:214-218`), yet password login appends **no + event** (`AccountEndpoints.cs:69-194`), magic-link login appends none + (`MagicLinkEndpoints.cs:195`), external login appends `UserLoggedInEvent` with + `IpAddress: null` (`ExternalLoginProcessor.cs:504,537`), and + `UserLoginFailedEvent` is **never appended anywhere** (`IdentityEvents.cs:46`). + The audit trail's most important content (logins) lives only in the ephemeral + sink for **two of three login paths** (password + magic-link write nothing; + external writes an event but with a null IP). +6. **Retention hardcoded** at 7 days (`AuthLogPersistenceService`), not per-realm, + not configurable — short for a tenant audit/evidence trail. +7. **No centralized operational logging at all.** Observability ships OTel + **metrics** (Prometheus + OTLP) and **traces** (OTLP), but **logs are not in + OpenTelemetry** — there is no `.WithLogs()` (`ObservabilityExtensions.cs:46-103`). + Serilog logs go only to Console + File, per-instance, un-aggregated. The + platform team has **no central place** to see system-wide errors. + +## The two-track architecture + +> **One principle above all:** the tenant audit is *derived from* committed +> events; it never depends on the log pipeline. Today they are fatally fused; the +> redesign makes the crossing one-directional. + +- **Track A — Tenant audit** (tenant-facing): a **projection** over the user- and + config-aggregate event streams we already keep + a separate **streamless + security store** for the records that have no aggregate. Durable and + GDPR-correct by *inheritance* for the stream-backed part, lawful-by-design for + the streamless part. +- **Track B — Operational logging** (platform-facing): OTel **Logs** → OTLP → an + **OTel Collector** (redaction processor = the guarantee) → **OpenObserve**, plus + a slim in-app **per-realm** live error feed. Best-effort, bounded, centralized, + and **opt-in / off by default** (§B.0) — the IdP runs fully without it. + +--- + +## Track A — Tenant audit + +### A.1 Source: a projection over streams we already keep + +There are **two families** of source stream. + +**Family 1 — user-aggregate streams (PII, erasable).** Keyed by `userId` +(`StartStream(userId)`), already carrying: created/deleted, password +changed, locked/unlocked, activated/deactivated, profile/email/username changed, +external-identity linked/unlinked (mirror events). All PII-bearing fields already +have masking rules (`MartenStoreOptionsExtensions.cs:188-229`) and are already +masked-then-archived per subject by `GdprService.PerformPermanentEraseAsync` +(`GdprService.cs:318-336`). **Nothing new is needed for these to be +GDPR-correct** — they already are. + +**Family 2 — config-aggregate streams (no PII today).** `OAuthApplicationAggregate`, +OAuth scopes/APIs, login-providers, DCR clients — already event-sourced +(`OAuthApplicationEvents.cs`, appended in `OAuthAdminService.cs`). These events +record **what changed** (e.g. "OAuth client Y's display name was updated"), **not +who changed it** — the acting admin's identity is not persisted in the config +event payload. They therefore hold no personal data and are not in the erase path, +but remain a rebuildable, tenant-relevant config-change source. *(If a "who did +this" trail is wanted, that is an actor-attribution concern for the streamless +security store — §A.5 — not a reason to put admin identities into config events.)* + +### A.2 The gap to close (the honest part) + +The login telemetry an audit trail most needs is **not reliably on the streams +today**: + +| Flow | Today | Fix | +|---|---|---| +| Password login | Serilog only, **no event** (`AccountEndpoints.cs:69-194`) | append a `UserLoggedInEvent` marker | +| Magic-link login | **no event** (`MagicLinkEndpoints.cs:195`) | append a `UserLoggedInEvent` marker | +| External / federation login | `UserLoggedInEvent` appended but `IpAddress: null` (`ExternalLoginProcessor.cs:504,537`) | keep emitting; settle the IP question below | +| Known-user login failure | `UserLoginFailedEvent` defined + masking-ruled, **never appended** (`IdentityEvents.cs:46`) | **open decision — see below** | + +This is **cheap on the success side**, because the event *type* and its masking +rule already exist. There is nothing to backfill (these events were never written), +so it is start-forward only. + +**Login success — a minimal marker event (decided).** Mirror the +`UserPasswordChangedEvent(userId, changedByUserId)` precedent +(`EventSourcedUserStore.cs:72`, `UsersEndpoints.cs:247`): a *marker* with no +sensitive payload. Append `UserLoggedInEvent(userId, method)` with **no IP on the +event** but **with the auth `method`** — a non-PII enum (`Password` / `MagicLink` +/ `External` + provider). "No sensitive payload" and "carries the method" don't +conflict: the method is exactly the high-value, non-personal signal an audit +wants — a sudden switch of method, or a first login via a new federation provider, +is a security event in its own right. *(The existing `UserLoggedInEvent` is +`(userId, ipAddress)`; adding `method` is a schema evolution — bump `EventVersion`.)* +The "logged in from where / which device" context already lives in the **Sessions +/ device-tracking feature** (`SessionTracker.RecordLoginAsync`, called right after +login at `AccountEndpoints.cs:117`), so putting the IP on the audit event would +*duplicate* PII we already hold elsewhere. The stream answers "when, and by what +method, did this user log in"; Sessions answers "from where". + +> **Clarification (this is not a lost write today).** A successful login already +> writes when `AccessFailedCount > 0`: Identity calls `ResetAccessFailedCountAsync` +> (`EventSourcedUserStore.cs:304-308`) → `UpdateAsync` (`:88-127`) persists the +> reset onto the **`UserSecurityData` document**. Likewise every failed attempt +> increments the counter on that document (`:300`), and lockout *transitions* +> append `UserLockedOutEvent`/`UserUnlockedEvent` (`AppendSecurityChangeEvents`, +> `:362-384`). What is missing is not the *state* (the counter is correct) but the +> *history*: the document tells you the current count, not that a login happened, +> when, or in what sequence. The marker event records the history; it does not fix +> a bug. + +**Login failure on a known user — decided: (b), aggregated on the user's stream.** +A marker per failed attempt has two costs a naive design misses: **stream spam** +(one event per typo) and an **amplification vector** — an attacker spraying wrong +passwords against a victim's account would grow *that victim's* stream and +projection. So "one event per attempt" is out. The real fork was **erasability vs. +a unified brute-force signal**, resolved in favour of erasability: + +- **(b) Throttled / aggregated — CHOSEN.** One `UserLoginFailuresObservedEvent(count, + since)` on the user stream per notable window, not per attempt. Solves + amplification *equally*, and stays **erasable + boundary-conformant** (it lives on + the subject's stream, so it masks/erases with everything else). Cost: known vs. + unknown failures live in two stores — unioned at read time in the Security view. +- **(a) Streamless — rejected.** Would keep known + unknown failures in one query, + but Alice's account *has* a stream and the streamless record carries her `UserId` + (§A.5), so it is a *deliberate choice not to use her erasable stream* — her attack + records would survive an erasure request until short retention. Defensible under + 6(1)(f)/17(3), but it puts identified-subject data outside the erase path, which + is exactly the boundary this design draws. +- **(c) Lockout-only — rejected.** Keeps today's behaviour (only the lockout + transition is event-sourced); loses the per-user failure history. + +### A.3 The view: `AuthAuditView` + +A Marten **`EventProjection`** folds events from *both* source families — +user-aggregate and config-aggregate types — into one flat read doc, **one row per +event**: `Timestamp`, `Realm`, `Category`, `EventType`, `UserId`, `TargetId`, +`UserName`, `Ip`, `Level`. **It is an `EventProjection`, NOT a Single/MultiStream +aggregation** (verified building Phase 0): an aggregation collapses a stream into a +single snapshot doc per identity (what `UserViewProjection`/`InboxItemProjection` +do), whereas an audit trail is a *list of occurrences*. Each `Create(IEvent)` +method maps one event type to a row, taking metadata from the `IEvent` envelope +(`Id` keys the row, `Timestamp`, `TenantId` → `Realm`, and for user-stream events +`StreamId` → `UserId`). No PII payload is copied in — see [§A.4](#a4-gdpr-masking-inherited-at-source-view-scrubbed-on-erase). +Use a `[DocumentAlias]` for schema stability. Keep `Ip`/`Level` as **first-class +columns** (not a JSON blob — that breaks the grid). `EventType` + `Category` drive +taxonomy-chip filtering in the SPA. + +**Actor identity is resolved at read time, not stored on the row** — and the +*source* of the name is a load-bearing GDPR choice. The view holds only `UserId` +(a pseudonymous GUID); the read endpoint joins it to the **`ApplicationUser`** doc +to show a username. It must be `ApplicationUser`, **not** the `UserView` +projection: `GdprService` masks the `ApplicationUser` doc *in place* on erase +(`UserName → "deleted-{guid}"`, name/email nulled, `GdprService.cs:230-243`), so an +erased user reads as `deleted-{guid}` — de-identified for free — whereas `UserView` +keeps the stale real name until a rebuild and would leak it. Config-stream rows +(no `UserId`) simply show no actor. + +> **Decision (locked): `AuthAuditView` lives PER-REALM in each tenant DB — not the +> system DB.** The user/config aggregate streams already live in the per-realm +> tenant DBs (each realm = a physical Postgres DB). A Marten projection writes +> through the tenant-scoped session factory and can only target its own DB, so the +> view is naturally per-tenant — and the isolation is then **physical**: a Realm-A +> admin *cannot* read Realm-B's audit even if a read-time filter were bypassed or +> misconfigured. This is the GDPR-safe choice and is **not** left open. A cross-realm +> system-DB projection (depending entirely on a `WHERE Realm =` filter, the way +> today's `AuthLogDocument` does) is explicitly rejected for the tenant +> GDPR-audit: a single filter bug would be cross-realm exposure. +> +> **Control-plane platform-wide view = explicit fan-out.** Because the GDPR-audit +> is per-realm, a control-plane operator's cross-realm query loops across the +> active realm sessions, queries each per-tenant `AuthAuditView`, and concatenates +> in app code. **Paginate/cap per realm** so a broad cross-realm query can't become +> an unbounded in-memory concatenation — acceptable precisely *because* this path +> is rare. It keeps realm isolation a DB-hard boundary. The cross-realm surface the +> platform team actually needs (brute-force across realms, operational events) is +> the **streamless security store** (§A.5), which *is* a single system-DB query and +> carries PR #50's `ScopeToCallerRealm` forward verbatim. + +### A.4 GDPR: masking inherited at source, view scrubbed on erase + +For the user-aggregate family, erasure is almost entirely inherited — with **one** +new wiring task that applies regardless of projection lifecycle. + +1. **No new masking code.** The events are already registered for masking and + already masked-then-archived per subject by `GdprService` + (`GdprService.cs:318-336`; ordering constraint: mask **before** archive, + `:316`). `ApplyEventDataMasking` rewrites the stored event **bytes in place**. +2. **The one new task: keep the rows, masked — and make them rebuild-durable.** + A GDPR erasure does **not** delete the user; it **masks** them (verified + `GdprService.PerformPermanentEraseAsync`): the `ApplicationUser` doc becomes a + `deleted-{guid}` tombstone (`:230-243`), the event stream's PII is rewritten + in place (`ApplyEventDataMasking`) and the stream is **archived** — *kept*, + hidden from active queries (`:313-336`) — and only the streamless secondary + docs (sessions, security data, links, passkeys, …) are hard-deleted + (`:246-295`). So the audit must **mask-and-keep**, never delete: Art-17(3) lets + a de-identified security record be retained. (Today the only PII column in + `AuthAuditView` is `Ip`; `UserName` is left null and `UserId` is a pseudonymous + GUID that resolves only to the tombstone — so de-identifying a row is just + nulling `Ip`.) Two pieces, no separate store: + - **Durable across rebuilds — `IncludeArchivedEvents = true` on the projection** + (verified API: `JasperFx.Events.Projections.IEventFilterable`, default false). + Because the masked events are *archived, not deleted*, this makes the daemon + **and** a full rebuild include them, so a rebuild regenerates the erased + user's rows **from the masked events** (`Ip` already null). The masked + archived events *are* the durable de-identified record — no second store, no + duplication. (This supersedes the earlier "delete the rows" / "separate + durable store" options: deleting is wrong for an audit trail, and a separate + store would just duplicate the masked events.) + - **Live freshness — refresh the rows in the erase call.** Masking rewrites + event bytes but appends no new event, so the live projection won't re-derive + the already-projected rows on its own (Marten has no cheap targeted + re-projection). So `PerformPermanentEraseAsync`, right after + `ApplyEventDataMasking` + `ArchiveStream`, sets `Ip = null` on the user's + `AuthAuditView` rows (a small load-modify-store on the per-tenant view, keyed + by `UserId`) — **synchronously**, so PII can't linger, and so the live view is + immediately identical to what an archived-inclusive rebuild would produce. + Regression test: after erase the user's rows **survive with `Ip == null`** (not + deleted), and survive a rebuild. +3. **Streamless security store — lawful, not erased-in-place.** Records about + unidentified actors (and operational records) stay out of the per-subject erase + path *because there is no subject stream to attach them to* — **not** because + they aren't personal data (they may well be; see the boundary above). They are + lawful under **Art. 6(1)(f)**, and **short retention is the proportionality + control** for the IP/attempted-email they carry. See §A.5 for the basis and the + known-actor edge case. + +### A.5 The streamless security/ops store + +The records that have **no aggregate** and therefore **no stream**: + +- **Security (tenant-relevant):** login attempts on unknown/inactive users + (`AccountEndpoints.cs:103`), rejected external logins + (`ExternalLoginProcessor.cs:51,77,189,228`), anonymous probes, rate-limit hits. + This is the credential-stuffing signal a realm-admin actually wants. +- **Operational (platform-relevant):** signing-key rotation + (`RealmSettingsEndpoints.cs:86-89`, `SigningKeyJanitorJob.cs:72`), SAML/OIDC + metadata refresh (`SamlMetadataRefreshService.cs:84`), recovery-CLI invocations + (`RecoveryCli.cs`, `"Auth: Recovery …"`), background sweeps, realm provisioning + (`RealmProvisioningService.cs` — note: today logs *without* the `"Auth:"` prefix, + so it never even reaches the current AuthLog — a gap this closes). + +**Shape:** a **flat, typed Marten document** (not event-sourced) in the **system +DB**, cross-realm, scoped-at-read via PR #50's `ScopeToCallerRealm` + +`IsControlPlane` (`AuthLogEndpoints.cs:71-84`) **carried forward unchanged**. Core +fields: `Timestamp`, `Realm`, `EventType` (from the `AuditEvents` taxonomy), +`IpAddress`, `Actor` (UserId if known, attempted username otherwise), `Status`/ +`Reason`. Indexed on `Timestamp`/`Realm`/`EventType`. Realm comes from +`TenantContext.Current` at emit time (the proven `RealmLogEnricher` pattern). +Short hard-retention (the existing 7-day prune becomes a Quartz job over this +store). + +**Routing decision (CONFIRMED): a tenant-visible "Security" view for +realm-admins** — yes. A realm-admin sees brute-force/probe attempts targeting +*their* realm's login surface (events carry `Realm` at emit). Platform-only events +(cross-realm infra, the signing-key janitor) stay control-plane-only. + +**GDPR for streamless records.** They contain personal data (attempted email, IP) +processed under **Art. 6(1)(f)**; the **short retention window is the control**. +Two edge cases to settle in Phase 3, with a Legitimate-Interest Assessment as the +deliverable: + +- **Pre-registration → registration.** If `alice@example.com` fails a login and + later registers, her pre-registration failure rows are not on her user stream + and won't be caught by per-subject erasure. The control is that the short + retention window expires those rows quickly; *optionally*, Phase 3 may scan the + streamless store for the new user's email at registration / erase time and purge + matches. Decide and disclose in the privacy policy. +- **Access/objection by an unregistered actor (Art. 15/21).** Decide whether + pre-registration attempt records are surfaced on an Art-15 request (and how + identity is verified) or are treated as time-expiring security records only. + This is a policy choice for the LIA + privacy policy, captured here so it isn't + silently dropped. + +**Deliverable (Phase 3): a Legitimate-Interest Assessment** (purpose = brute-force +/ credential-stuffing detection; necessity of raw IP vs. hash/geo; proportionality +of the retention window; alternatives considered; safeguards = access-gating + +the query audit below). This is a production prerequisite, not part of this doc. + +### A.6 Read surface, retention, and audit-of-the-audit + +- **Tenant GDPR-audit:** query the per-realm `AuthAuditView`, taxonomy-chip filter + (`EventType`/`Category`), columns for `UserName`/`Ip`/`Level`. +- **Retention = a *visibility window*, not a deletion — and say so precisely.** The + view is rebuildable; its source events live with the aggregate for the + aggregate's lifetime (masked on erase, deleted with the account). So a per-realm + "audit retention: 30 days" trims what the *view shows* to 30 days — it does + **not** delete login history older than that; the markers stay on the stream + until the account is deleted. That is privacy-sound (minimal no-IP markers, + erased with the account — "kept for the account's lifetime" is a defensible + retention), but it is a **false-promise trap** for a redesign whose original sin + was a GDPR false promise: a tenant-admin reading "Retention: 30 days" will assume + older history is *deleted* — it isn't. So label the setting honestly in UI and + docs as a **visibility / view window**, and state that the login history itself + is tied to account lifetime. Per-realm window via an `AuditSettings` sub-record on + `RealmSettings` (follow `DeletionSettings.cs`: `RetentionDays` + static + `Defaults`), wired through `GET/PATCH /admin/realm-settings`. **Never archive + source streams for retention** — that would corrupt the aggregate. The streamless + store, by contrast, keeps a short **hard** prune that genuinely deletes — that + *is* its GDPR control (intentionally not per-realm configurable, to keep the + legitimate-interest window tight). +- **Audit-of-the-audit (NEW).** Reading, exporting, or **clearing** the audit is + itself an auditable action. Today the clear endpoint (`AuthLogEndpoints.cs:57-61`) + wipes records with no record of who cleared, and `GdprService` export records + only a meter. Route a typed `AuditExportedEvent` / `AuditClearedEvent` (operator + identity + timestamp + realm) to the **streamless security store**, short + retention, realm-tagged at emit. These are forensic records of an operator + action; treat their retention under the same legitimate-interest basis as the + rest of the security store rather than the per-subject erase path. + +### A.7 Taxonomy and explicit scope + +A `AuditEvents.cs` (+ `AuditCategories`) in **`Modgud.Infrastructure.Audit`** +(*Phase-3 correction:* the Phase-0 scaffold put it in `Modgud.Authentication.Audit`, +but the streamless emit call sites live in lower layers — notably +`RealmProvisioningService` in `Modgud.Infrastructure` — so the taxonomy had to move +down to the lowest layer every call site can reach without a magic string; +`AuthAuditView` + the projection stay in `Authentication.Audit` and reference it): +const-string event-type codes + categories, with XML docs declaring +each event's fields **and which are PII** (the PII annotation is what tells you +whether an event belongs on a user stream or in the streamless store). The ~50 +mapped `"Auth:"` sites group into Authentication, Account, Federation, Admin/Realm, +DCR/OAuth, and Security-Ops. Each row carries a `Level` (preserve the existing +Warning/Error/Info mapping). Schema evolution needs **no `EventVersion` +machinery** — the codebase has none; events evolve via tolerant System.Text.Json +deserialization + `MapEventType` aliases (verified Phase 0). + +**Out of scope (stated so it isn't ambiguous):** + +- **2FA / passkey / email-OTP state transitions** are persisted today as document + mutations on `ApplicationUser` only, **not** as appended events + (`MfaEndpoints.cs`, `EmailOtpEndpoints.cs`, `PasskeyEndpoints.cs`). They are + **out of scope** for this redesign and remain non-auditable state changes. + Future work *may* event-source them (`MfaEnabledEvent`/`MfaDisabledEvent`/…) and + fold them into the user-stream audit. +- **Profile change-requests** (the `EmailVerificationPending → + AdminApprovalPending → Approved/Rejected` workflow) are document-only, carry PII + in their payload, and are deleted during permanent erase by `GdprService`. They + are **out of scope** here. If a tenant-visible "who approved/rejected this" + record is later needed, emit change-request events on the user stream (making + them erasable audit events) rather than mining the documents. + +--- + +## Track B — Platform operational logging + +> **B.0 — Optional by design (opt-in).** All of Track B is toggleable, and the IdP +> — including the entire Track A audit — runs fully with it **off**. Two sub-parts, +> different dependency profiles: +> - **Export** (OTel Logs → collector → OpenObserve) is gated on the **existing** +> `Observability__Otlp__Enabled` flag and is **off by default** (verified: +> `ObservabilitySettings.cs:58`, `configuration.json`). With it off, Serilog stays +> Console + File, no OTLP — **no external infra required**; the collector and +> OpenObserve are needed only once an operator turns export on. +> - **In-app per-realm error feed** (§B.3) is **local-only** (a bounded buffer + the +> existing SignalR hub), no external dependency, so it can run independently of the +> export behind its own flag. +> +> Track A must **never** hard-depend on Track B: disabling operational logging does +> not weaken the audit (Principle 3). This is a requirement, not a nice-to-have — +> many deployments (single-instance, dev) will run with Track B off. + +### B.1 OTel Logs — the missing third signal + +Add `.WithLogs()` to the OpenTelemetry builder (`ObservabilityExtensions.cs:46-103`, +alongside `.WithMetrics`/`.WithTracing` — confirmed absent today), exporting via +OTLP. This needs the OpenTelemetry logging bridge (the `OpenTelemetry.Logs` / +`Serilog.Sinks.OpenTelemetry` package) — Serilog stays the in-process logger, OTel +adds the OTLP export. **Reuse** the existing `ConfigureOtlp` helper + `OtlpSettings` +(same `Observability__Otlp__Enabled` gate, default endpoint `http://localhost:4317` +Grpc, `ObservabilitySettings.cs:52-68`) — no new config section. Realm-tag log +records at emit (`RealmLogEnricher`), so even system-tenant background/admin errors +carry `realm=system` and stay filterable, and logs become **correlated with +traces** (same trace-id) in the backend. + +> **Shipped note (deviation from "`.WithLogs()`").** The app wires log export as a +> **Serilog sink** (`Serilog.Sinks.OpenTelemetry`, `WriteTo.OpenTelemetry` in the +> `AddSerilog` block in `Program.cs`), **not** OTel `.WithLogs()`. Reason: `AddSerilog` +> runs with `writeToProviders:false`, so an OTel `ILoggerProvider` (what `.WithLogs()` +> registers) would either receive nothing or — with `writeToProviders:true` — receive +> MEL records *without the Serilog enrichers*, i.e. **without the `Realm` tag** §B.1 +> requires. The Serilog sink emits every property (incl. `Realm`) as a log-record +> attribute and reads `Activity.Current` for trace correlation. The intent is honoured: +> same `Observability__Otlp__Enabled` gate + `OtlpSettings.Endpoint/Protocol`, no new +> config section. (`ConfigureOtlp` itself is metrics/tracing-specific — +> `Action` — so it isn't literally reused; the sink maps the same +> `OtlpSettings` fields to its own options type.) + +### B.2 The backend: OpenObserve behind an OTel Collector (CONFIRMED) + +The destination is **OpenObserve**, reached through an **OTel Collector** sitting +between the app and the backend. + +> **The redaction GUARANTEE lives at the collector, not at the call site.** A +> redaction/transform processor in the collector pipeline strips PII (emails, IPs +> where required, tokens) as a *pipeline guarantee*. Call-site +> `LogPiiMasking.MaskEmail` stays as **belt** — defense in depth, best-effort — +> but it is no longer the thing we rely on for correctness. This is the inversion +> from today, where redaction *is* call-site discipline and therefore leaks the +> moment one site forgets. + +The guarantee is only as good as its configuration, so it is **operationally +conditional**: Phase 4 must (a) version-control the exact PII field set the +processor targets, (b) include an end-to-end test proving emails/IPs/tokens are +redacted before they reach OpenObserve, and (c) document the failure modes (silent +drop, misconfigured processor) with monitoring. Logs are realm-tagged for per-realm +filtering / RBAC inside OpenObserve, and OpenObserve owns operational retention. + +### B.3 Slim in-app live error feed — **per-realm-bounded buffers** + +For the in-app live-tail of errors, **do not repeat today's global-ring mistake.** +The existing `ObservabilityActivityBuffer` is a single global ring with query-time +realm filtering, and a loud realm **provably evicts a quiet realm's events** +before its admin sees them (`ObservabilityActivityBuffer.cs:49-53`, verified). The +new error feed must use **per-realm-bounded buffers** — a small independently-capped +ring *per realm* (keyed by realm) — so a noisy realm cannot starve a quiet realm's +error visibility. Live push via `ObservabilityHub` realm-filtered subscribe +(`ObservabilityHub.cs:32-54` pattern); REST snapshot mirroring +`AdminObservabilityEndpoints.cs`; a parallel error panel in +`AdminObservabilityView.vue`. No retention job — each realm's ring evicts its own +oldest. (Single-instance today; cross-instance is the broader HA/Redis-backplane +question, deliberately out of scope — `ObservabilityActivityBuffer.cs:17-20`.) + +### B.4 Access + +Gate on the existing `observability:read` (operator-scoped). Per-realm admins see +their realm's tagged errors. *(Shipped: per-method SignalR auth on `ObservabilityHub` +is now wired — both stream methods imperatively check `observability:read` against +the caller's realm, since SignalARR has no per-method authorisation attribute. +Control-plane cross-realm aggregation stays deferred — the whole observability +surface, REST included, is realm-scoped today.)* + +--- + +## Shared principles + +1. **No new audit store.** The tenant audit is *derived* from committed events, + not stored a second time. This is the principle that collapsed the first + draft. +2. **Stream-backed = erasable in place; streamless = lawful under legitimate + interest with short retention.** A registered user's auth events attach to + their stream and inherit masking; records about unidentified actors have no + stream, are still treated as personal data, and rely on a documented Art-6(1)(f) + basis + tight retention rather than per-subject erasure. +3. **Separation of pipelines.** Audit = projection over committed events (durable, + exactly-once, GDPR inherited). Operational = OTel Logs → collector → OpenObserve + (best-effort, lossy-by-design, **opt-in / off by default** — §B.0). Crossing is + one-directional; audit never depends on logging, and **disabling Track B never + weakens the audit**. +4. **Redaction guarantee at the collector** for operational logs; **masking + inherited at source** for the tenant audit. Neither relies on per-call-site + discipline for correctness. +5. **Realm attribution at emit time, always** — from `TenantContext.Current`, + because both persistence paths run tenant-less downstream. Background → + `system`. +6. **Isolation: physical for the GDPR-audit projection** (per-realm DB), + **scoped-at-read for the streamless security store** (system DB, PR #50's + `ScopeToCallerRealm` carried forward). +7. **Audit-of-the-audit** — reading/exporting/clearing the audit is itself + auditable. + +## Open decisions (yours to make) + +1. **Known-user login-failure routing** (§A.2) — **DECIDED: (b)**, aggregated + `UserLoginFailuresObservedEvent` on the user's stream (erasable + + boundary-conformant; amplification solved by aggregation; known vs. unknown + failures unioned at read time). (a) streamless and (c) lockout-only rejected — + (a) would hold an identified subject's `UserId` outside her erasable stream. +2. **Projection lifecycle** — inline vs async `AuthAuditView`. Either way the + erase-time row refresh (§A.4.2: set `Ip = null`, keep the row) is mandatory, and + `IncludeArchivedEvents = true` makes a rebuild regenerate erased rows from the + masked events. Inline gives instant steady-state freshness, async is + eventually-consistent. *Recommendation: match the `UserViewProjection` lifecycle.* +3. **External-login IP** — now that success is a no-IP marker (§A.2), keep external + login's `UserLoggedInEvent` IP `null` too (consistency, IP via Sessions) or let + federation logins carry it. *Recommendation: null, for consistency.* +4. **Streamless pre-registration PII** (§A.5) — **DECIDED: time-expiry only.** No + scan-and-purge on registration/erase; the short retention window is the control. + Disclosed in the LIA + (operator's) privacy policy. (Phase 3.) +5. **Collector deployment** — sidecar vs shared collector; the redaction processor + ruleset (the PII field set). (Ops decision.) +6. **OpenObserve multi-tenancy** — one stream per realm vs realm-tag + RBAC inside + OpenObserve. +7. **In-app error feed floor** — which severity (ERROR-only vs WARN+) and which + infra namespaces (Marten, Npgsql, Wolverine) feed the per-realm buffers. +8. **Permission naming** — **DECIDED: split.** A new `audit-log:read` gates the + tenant GDPR-audit (`/admin/audit`); `auth-log:read` is kept (not renamed) for the + streamless Security store (`/admin/auth-log`). Registered in the runtime catalog + + the (evolving) per-realm seeder + the seeded User Manager bootstrap role, so + existing realms gain `audit-log:read` on next boot. (Phase 3.) +9. **Migration** — strangler (typed path alongside the legacy sink, retire the + magic-prefix last) vs big-bang. *Recommendation: strangler, login telemetry + first (Phase 1), then drain the streamless `"Auth:"` sites.* + +## Phasing + +- **Phase 0 — Catalog + projection scaffold** ✅ *shipped* (no behavior change): + `AuditEvents`/`AuditCategories` taxonomy with PII annotations, the `AuthAuditView` + **`EventProjection`** (one row per event) over the existing user-aggregate + login- + provider streams, registered async, with an integration test proving events project + to flat typed rows (`Modgud.Authentication/Audit/*`, `Modgud.Api.Tests/Audit/`). + *(OAuth application/scope/API config events are the next mechanical addition — same + `Create(IEvent)` pattern.)* +- **Phase 1 — Close the event-sourcing gap** (§A.2) ✅ *shipped*: the + `UserLoggedInEvent` marker (with `method`, IP via Sessions, best-effort so it + never breaks a login) now appends on password + magic-link + external login; + known-user failures emit the aggregated `UserLoginFailuresObservedEvent` + (Open Decision #1 (b)) from `EventSourcedUserStore.AppendSecurityChangeEvents` + when a failure streak resolves (counter `>0 → 0`). This is where the load-bearing + boundary starts being *enforced*. **Known limitation:** streak-resolution + emission rides ASP.NET Identity's failed-count reset, which only the *password* + path performs — a magic-link / external login does not reset the count, so a + streak there resolves on the next password success (or is reflected by lockout). + Aligning all methods is deferred (it touches lockout semantics). **Note:** the + streamless store doesn't exist until Phase 3, so between Phase 1 and Phase 3 the + legacy `AuthLogSink` keeps carrying the streamless-bound records (unknown-user + attempts, operational `"Auth:"` sites) — the strangler retires it only once the + typed store stands up. No record falls on the floor in the interim. +- **Phase 2 — Tenant GDPR-audit read surface** ✅ *shipped*: `GET /api/admin/audit` over the + per-realm `AuthAuditView` — a **tenant-scoped session → physical realm isolation** + (no `WHERE Realm =`; a filter bug can't leak cross-realm), `category`/`eventType` + filters, gated on `auth-log:read`. Plus the **mask-and-keep erase handling** + (§A.4.2: `IncludeArchivedEvents = true` + null the user's `Ip` in the erase call) — + tested: an erased user's rows **survive de-identified** (`Ip == null`) across a + rebuild. The per-realm **`AuditSettings.VisibilityWindowDays`** window (default 90, + on `RealmSettings`, via `GET/PATCH /admin/realm-settings`) is applied at read — + named a *visibility* window, not "retention", so it can't read as a deletion + promise (§A.6). The **SPA `AuditLogView`** (sidebar `/admin/audit`, category-chip + filter over the grid) is shipped and verified live with Chrome DevTools — a real + admin password login surfaced an `auth.login_succeeded`/`password` row end-to-end + (Phase 1 → projection → endpoint → view), with the actor's **`Benutzer`/User + column resolved at read time from the erasure-masked `ApplicationUser`** (§A.3 — + so erased users de-identify in the displayed audit too; config rows show no + actor). **Pending:** only the control-plane cross-realm fan-out (deferred — + platform-wide is the Phase-3 streamless store). +- **Phase 3 — Streamless security/ops store** (§A.5) ✅ *shipped*: the typed + `SecurityAuditEntry` store (system DB) + `ISecurityAuditLog`/`SecurityAuditWriter` + (bounded best-effort channel; realm captured from `TenantContext.Current` at emit, + with an explicit override for realm-iterating jobs). All streamless `"Auth:"` sites + migrated to typed `Record(...)` — incl. the prefix-less realm-provisioning logs and + (caught by adversarial review) the SAML login-flow rejections + a new + `security.saml_signature_rejected` tamper code, and the DCR `ops.dcr_client_first_used`. + The `"Auth:"`-prefix convention is gone from every call site. Read surface carried + forward (`/api/admin/auth-log` → `SecurityAuditEntry`) with #50's `ScopeToCallerRealm` + + a `PlatformOnly` visibility split; the SPA `AuthLogView` repurposed as the tenant + **Security** view (category chips). Audit-of-the-audit (`audit.log_cleared`); a fixed + short-retention Quartz prune (`SecurityAuditPruneJob`); the **permission split** + (`audit-log:read` for the GDPR-audit, `auth-log:read` for the Security store, Open + Decision #8); and the [Legitimate-Interest Assessment](../compliance/legitimate-interest-assessment-security-store.md) + (Open Decision #4 = time-expiry only). **Deleted** `AuthLogSink`, + `AuthLogPersistenceService`, `AuthLogDocument`, and the orphaned `DcrAuditEvents` + vocabulary. (`RealmLogEnricher` kept — it tags operational logs for Phase 4.) + DevTools-verified end-to-end (failed login → Security row; clear → `audit.log_cleared`). +- **Phase 4 — OTel Logs → collector → OpenObserve** (§B.1–B.2) ✅ *shipped*: log + export wired as a **Serilog OTLP sink** (`Serilog.Sinks.OpenTelemetry`, not + `.WithLogs()` — see the §B.1 shipped note), behind the existing + `Observability__Otlp__Enabled` gate (off by default; §B.0), realm-tagged + (`RealmLogEnricher`) + trace-correlated. The redaction **guarantee** is a + versioned transform/OTTL processor (`redaction-ruleset: v2`) in + `docker/otel-collector/otel-collector-config.yaml` that strips emails / JWTs / + Bearer-Basic creds / IPv4 / IPv6 / usernames from the log body **and** top-level + attribute values (resource attributes left intact), proven by an **end-to-end + test against a real collector** (`Modgud.Api.Tests/Observability/OtelLogsRedactionTests`) + plus anti-drift checks pinning the test ruleset to the shipped one **and** that the + shipped pipeline wires the processor before export. Failure modes + realm=system + fallback + a local `docker-compose.observability.yml` (Collector + OpenObserve) + documented in `docs/operate/observability.md`; verified end-to-end against the + real stack (PII scrubbed, realm-filterable in OpenObserve). A deployment without + OpenObserve/collector is unaffected (gate off → Serilog stays Console + File). + `LogPiiMasking.MaskEmail` kept as belt. The block went v1→v2 after an adversarial + diff review (usernames have no value shape: the `UserName`/`Actor` attributes are + dropped and the `User=` body form masked; IPv6 leading-`::` added). + **Source-side belt (shipped alongside):** the operational log sites no longer emit + raw login identifiers — an identified user is logged as `user.Id` (a GUID that + erasure tombstones), and an unidentified actor's attempted handle is masked via the + new `LogPiiMasking.MaskUsername` (email-aware). This covers ~27 sites across the + Account / Profile / Admin-grace / passkey / magic-link / 2FA-enforcement / external- + unlink / bootstrap surfaces, **and** the streamless **security store** writes (the + unknown-user failed-login `Actor`/`Message`, the bootstrap-invite, and the six + Recovery-CLI break-glass emits — that DB sink does not pass through the collector, + so masking there is the only control; CLI console output stays human-readable). + Collector v2's `User=`/`UserName` rules remain as belt for any future call site. +- **Phase 5 — In-app per-realm error feed** (§B.3) ✅ *shipped*: a new + `RealmErrorBuffer` (`Modgud.Infrastructure.Observability`) keeps an + independently-capped ring **per realm** (keyed by realm slug) — **not** the global + ring of `ObservabilityActivityBuffer`, so a noisy realm can never evict a quiet + realm's errors (the §B.3 guarantee, unit-tested). A Serilog `ErrorFeedSink` + (`Modgud.Authentication/AuthLog`, beside `RealmLogEnricher` since Infrastructure has + no Serilog ref) captures qualifying events into it, reading the realm from the + enricher-stamped `Realm` property. `ObservabilityHub.LogsSubscribe()` streams a + realm's entries; `GET /api/admin/observability/errors` is the REST snapshot; an + error panel was added to `AdminObservabilityView.vue` (i18n `errorFeed*`). Local-only + behind its own `Observability__ErrorFeed__Enabled` flag (default on; independent of + the OTLP export gate — §B.0), with configurable `MinimumLevel`/`SourcePrefix`/ + `CapacityPerRealm`. **Open Decision #7 answered: Error+ from `Modgud.*` loggers only** + (framework failures surface in Console/File/OpenObserve, not the in-app panel) — + configurable, and the settings docs note the effective floor is `max(this, Serilog's + global+namespace pipeline floors)`. The redaction collector does **not** cover this + in-app path — the call-site PII belt + per-realm read scoping are the controls + (mirrors the streamless security store). **Carried-forward hardening shipped:** + per-method `observability:read` auth on the hub — SignalARR has no per-method + authorisation attribute, so both stream methods check it imperatively via + `IPermissionService`. **⭐ Load-bearing gotcha (adversarial-review catch):** + `TenantContext.Current` is **not** set during SignalARR hub dispatch (it unwinds + after the negotiate request → falls back to `system`), so the realm filter AND the + permission query must read the caller's realm from `HttpContext.Items` (like the + sibling hubs), and the permission check must run inside `TenantContext.Enter(realm)` + on a fresh DI scope so the tenant-scoped `IQuerySession` binds to the right realm DB. + The original draft used `TenantContext.Current` for both — which would have wrongly + denied non-system realm-admins and leaked system-realm errors to every tenant admin + (masked in single-realm dev); the fix also corrected the same latent bug in the + pre-existing metrics `Subscribe()`. DevTools-verified end-to-end (panel renders, + `/errors` 200, SignalR streams subscribe clean, a live login event pushed to the feed + via the shared async-`Observable.Create` helper). + +## What gets deleted at the end + +`AuthLogSink`, `AuthLogPersistenceService` (the `Channel` + `BackgroundService`), +the `"Auth:"` magic-prefix convention at all call sites, the hardcoded 7-day +constant (becomes config for the audit window; stays a fixed short prune for the +streamless store), and `AuthLogDocument`-as-the-audit-store (its personal-data +portion becomes the `AuthAuditView` projection; its streamless portion becomes the +typed security store). The `AuthLogEndpoints` HTTP surface is **carried forward +unchanged** — the new stores back the same API, so the SPA (`AuthLogView.vue`) +keeps working. **`RealmLogEnricher` stays** — it is still how operational OTel logs +*and* the streamless security store get their realm tag at emit time. + +## Why the first draft was wrong + +The first draft proposed building a **new** event-sourced audit *stream* with its +own Wolverine outbox path, per-event-type masking-rule registration, an +anonymization-past-retention scheme, an Art-17(3)-per-category exemption section, +and tamper-evidence. The cross-review + the code-fact verification collapsed all of +it: + +- **The audit stream was redundant** — the auth history is *already* event-sourced + on the user aggregates (verified: 12 event types appended to user streams), so a + parallel audit stream would duplicate it. The fix is to *project* and to *finish + appending the few missing events*, not to build a second store. +- **The masking machinery already exists** — masking rules are registered + (`MartenStoreOptionsExtensions.cs:188-229`) and `GdprService` already masks + + archives per subject (`GdprService.cs:318-336`). The genuinely new GDPR task is + small and different: scrubbing the *projected view* on erase (§A.4.2). +- **The heavy GDPR sections became moot** — there is no keep-forever store, so + "anonymize past retention" and "Art-17(3) per category" had nothing to apply to. +- **Tamper-evidence is out of scope** — the tenant audit is time-bounded and + DB-trusted, not a forever-forensic ledger. "We trust the DB." + +What survived from the draft: the typed event catalog, the tenant-facing scoping, +and the audience split. What verification + adversarial review *added or +corrected*: the login-telemetry gap (§A.2) and the marker-event choice; the +correct GDPR framing (streamless records **are** personal data, lawful under +legitimate interest — not "outside GDPR"); the corrected Marten masking semantics +(masking rewrites bytes at rest, but the *projected view* must be explicitly +scrubbed on erase — §A.4.2); the locked per-tenant-DB placement; that +`AuthAuditView` reads from *multiple* stream families (later refined in Phase 0 to +an `EventProjection`, not an aggregation); and the confirmation that today's live +buffer is a global ring that starves quiet realms (§B.3). + +A second cross-review round then sharpened the refinements: the failure-routing +fork is **erasability vs. a unified brute-force signal** (not spam-vs-simple), +resolved to **(b)** — aggregated on the user's stream, keeping it erasable (Open +Decision #1, the one call made before Phase 1); the success marker carries the +non-PII auth `method` (§A.2); erasure **masks-and-keeps** the audit rather than +deleting — a GDPR-erased user is *masked, not deleted*, so `IncludeArchivedEvents` +makes the masked archived events the durable, rebuild-safe de-identified record +(§A.4.2); and "retention" is named a **visibility window** so it can't become a +softer reprise of the very false promise this redesign removes (§A.6). + +Then **building Phase 0** corrected one more thing the design (and both reviews) +got wrong: `AuthAuditView` is an **`EventProjection`** — one row per event — not a +Single/MultiStream aggregation. An aggregation collapses a stream into one snapshot +doc per identity; an audit log is a *list of occurrences*. The build is the +arbiter: the framing only revealed itself as wrong once the code had to compile +against the real Marten API. diff --git a/docker/docker-compose.observability.yml b/docker/docker-compose.observability.yml new file mode 100644 index 00000000..fc6becde --- /dev/null +++ b/docker/docker-compose.observability.yml @@ -0,0 +1,49 @@ +# Local observability stack for trying out Phase 4 log export end-to-end: +# Modgud (OTLP) -> OTel Collector (redaction) -> OpenObserve. +# +# This is a DEV CONVENIENCE, not a production deployment. The credentials below +# are throwaway; the production collector topology + OpenObserve org/RBAC layout +# are an ops decision. See docs/operate/observability.md. +# +# docker compose -f docker/docker-compose.observability.yml up -d +# +# Then run the API with export on: +# Observability__Otlp__Enabled=true +# Observability__Otlp__Endpoint=http://127.0.0.1:4317 # 127.0.0.1, not localhost (see observability.md) +# +# OpenObserve UI: http://localhost:5080 (root@example.com / Complexpass#123) + +services: + openobserve: + # Pin a concrete tag for reproducibility in your own setup. + image: public.ecr.aws/zinclabs/openobserve:latest + environment: + ZO_ROOT_USER_EMAIL: root@example.com + ZO_ROOT_USER_PASSWORD: Complexpass#123 + ports: + - "127.0.0.1:5080:5080" + volumes: + - openobserve-data:/data + restart: unless-stopped + + otel-collector: + image: otel/opentelemetry-collector-contrib:0.153.0 + command: ["--config=/etc/otelcol-contrib/config.yaml"] + volumes: + - ./otel-collector/otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml:ro + environment: + # OpenObserve's OTLP/HTTP logs ingest for the "default" org, with the root + # user's Basic-auth header (base64 of ":" above). + OPENOBSERVE_LOGS_ENDPOINT: http://openobserve:5080/api/default/v1/logs + OPENOBSERVE_AUTHORIZATION: "Basic cm9vdEBleGFtcGxlLmNvbTpDb21wbGV4cGFzcyMxMjM=" + ports: + # Loopback-only: the OTLP receiver is unauthenticated and this stack is a + # local/dev convenience, not a deployable topology. + - "127.0.0.1:4317:4317" # OTLP gRPC + - "127.0.0.1:4318:4318" # OTLP HTTP + depends_on: + - openobserve + restart: unless-stopped + +volumes: + openobserve-data: diff --git a/docker/otel-collector/otel-collector-config.yaml b/docker/otel-collector/otel-collector-config.yaml new file mode 100644 index 00000000..272ff6ae --- /dev/null +++ b/docker/otel-collector/otel-collector-config.yaml @@ -0,0 +1,121 @@ +# Modgud — OpenTelemetry Collector configuration (Phase 4, Track B). +# +# Sits between the IdP and OpenObserve. Its redaction processor is the PII +# GUARANTEE for operational logs (logging-audit-redesign.md §B.2): emails, IPs, +# tokens and usernames are stripped here, in the pipeline, regardless of whether +# a call site remembered to mask. Call-site LogPiiMasking.MaskEmail stays as a +# belt — defense in depth, best-effort — but it is no longer the control. +# +# Only engaged when the IdP runs with Observability__Otlp__Enabled=true. With +# the gate off the app never exports, so this collector is not in the path and +# no external infra is required (§B.0). +# +# Requires the *-contrib* collector distribution (the transform processor with +# OTTL ships in contrib, not the core collector). +# +# See dev-docs/future-features/logging-audit-redesign.md §B.1-B.2 and +# docs/operate/observability.md. + +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + # >>> redaction-ruleset: v2 — BUMP THE VERSION IF YOU EDIT THIS BLOCK <<< + # + # The exact PII field set the guarantee targets. Versioned because the + # guarantee is only as good as this block (§B.2): the end-to-end test + # (Modgud.Api.Tests/Observability/OtelLogsRedactionTests) pins it against a + # real collector, and this version string is what an operator audits. The test + # also asserts this block matches the test collector's copy verbatim AND that + # the shipped pipeline actually wires this processor before export, so the rules + # under test never drift from the ones that ship. + # + # Scope: the log BODY (rendered message) and every top-level string ATTRIBUTE + # value (Serilog properties land here). Resource attributes (service.version, + # ...) are deliberately NOT touched — e.g. service.version "1.0.0.0" must not be + # mistaken for an IPv4 address. + # + # Known limits (must be masked at the call site if they can carry PII): + # - replace_all_patterns(value) does NOT recurse into nested Map/Slice + # attribute values, so destructured objects ({@...}) / logged collections + # are out of scope here — mask those at the source. + # - a bare secret with no adjacent bearer/basic keyword has no value shape; + # it is caught in the body only via the scheme, not as an attribute value. + # - usernames have no value shape: the known UserName/Actor carrier attributes + # are dropped and the "User=" body form is masked, but a login identifier + # inlined into other free-text prose relies on the call site not logging it + # raw (log user.Id instead). + # - error_mode: ignore -> a per-statement runtime error skips THAT statement + # (best-effort, Track B). The statements are simple guarded regex replaces, + # so the leak surface is narrow; do NOT switch to propagate without analysis + # (propagate drops the whole payload on any error). + # + # v2 targets (regexes are single-quoted YAML so backslashes are literal): + # - email addresses -> [REDACTED_EMAIL] + # - JWTs (eyJ.x.y) -> [REDACTED_TOKEN] + # - Bearer/Basic creds -> [REDACTED_AUTHORIZATION] + # - IPv4 addresses -> [REDACTED_IP] + # - IPv6 addresses -> [REDACTED_IP] (full, ::-compressed, and leading-::) + # - usernames -> UserName/Actor attrs dropped; "User=" body masked + transform/redaction: + error_mode: ignore + log_statements: + - context: log + statements: + # --- log body (rendered message), only when it is a string --- + - 'replace_pattern(log.body, "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}", "[REDACTED_EMAIL]") where IsString(log.body)' + - 'replace_pattern(log.body, "eyJ[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+", "[REDACTED_TOKEN]") where IsString(log.body)' + - 'replace_pattern(log.body, "(?i)(?:bearer|basic)\\s+[A-Za-z0-9._~+/-]+=*", "[REDACTED_AUTHORIZATION]") where IsString(log.body)' + - 'replace_pattern(log.body, "\\b(?:[0-9]{1,3}\\.){3}[0-9]{1,3}\\b", "[REDACTED_IP]") where IsString(log.body)' + - 'replace_pattern(log.body, "(?:[0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}", "[REDACTED_IP]") where IsString(log.body)' + - 'replace_pattern(log.body, "(?:[0-9A-Fa-f]{1,4}:){1,7}:[0-9A-Fa-f]{0,4}", "[REDACTED_IP]") where IsString(log.body)' + - 'replace_pattern(log.body, "::[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*", "[REDACTED_IP]") where IsString(log.body)' + - 'replace_pattern(log.body, "(?i)\\bUser=[^\\s,;]+", "User=[REDACTED_USER]") where IsString(log.body)' + # --- all top-level string attribute values (Serilog properties) --- + - 'replace_all_patterns(log.attributes, "value", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}", "[REDACTED_EMAIL]")' + - 'replace_all_patterns(log.attributes, "value", "eyJ[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+", "[REDACTED_TOKEN]")' + - 'replace_all_patterns(log.attributes, "value", "(?i)(?:bearer|basic)\\s+[A-Za-z0-9._~+/-]+=*", "[REDACTED_AUTHORIZATION]")' + - 'replace_all_patterns(log.attributes, "value", "\\b(?:[0-9]{1,3}\\.){3}[0-9]{1,3}\\b", "[REDACTED_IP]")' + - 'replace_all_patterns(log.attributes, "value", "(?:[0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}", "[REDACTED_IP]")' + - 'replace_all_patterns(log.attributes, "value", "(?:[0-9A-Fa-f]{1,4}:){1,7}:[0-9A-Fa-f]{0,4}", "[REDACTED_IP]")' + - 'replace_all_patterns(log.attributes, "value", "::[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*", "[REDACTED_IP]")' + # usernames have no value shape -> drop the known carrier attributes. + - 'delete_key(log.attributes, "UserName")' + - 'delete_key(log.attributes, "Actor")' + # >>> end redaction-ruleset: v2 <<< + + # Bound batch sizes so a burst can't OOM the collector. + batch: + timeout: 5s + send_batch_size: 1024 + send_batch_max_size: 2048 + +exporters: + # Real destination — OpenObserve OTLP/HTTP ingest. Endpoint + credentials come + # from the environment so no secret is committed. An unset OPENOBSERVE_* expands + # to an empty value: the collector still starts (and still redacts), but export + # then fails and records are dropped — set both env vars for export to work. + # + # Kept as "otlphttp" (not the newer "otlp_http" alias) for compatibility with + # older collector builds; recent contrib builds log a harmless deprecation note. + otlphttp/openobserve: + logs_endpoint: ${env:OPENOBSERVE_LOGS_ENDPOINT} + headers: + Authorization: ${env:OPENOBSERVE_AUTHORIZATION} + +service: + pipelines: + logs: + receivers: [otlp] + # transform/redaction BEFORE batch: every record passes through redaction + # before it can reach an exporter. + processors: [transform/redaction, batch] + exporters: [otlphttp/openobserve] + telemetry: + logs: + level: info diff --git a/docs/operate/observability.md b/docs/operate/observability.md index 6fc9a3fb..c26afabb 100644 --- a/docs/operate/observability.md +++ b/docs/operate/observability.md @@ -13,7 +13,8 @@ Permissions for the in-app live view: `observability:read`. The `realm:admin` by | Surface | Path | Auth | | --- | --- | --- | | Prometheus scrape | `/metrics` (default) | Static **bearer token** — set via `Observability__Prometheus__BearerToken`. Mismatch returns 404 (not 401) so the endpoint's existence stays unconfirmed. Constant-time compare. | -| OTLP push (metrics + traces) | configurable endpoint (default `http://localhost:4317`) | Whatever the collector requires. Off by default; turn on when you actually have a collector (Tempo, Honeycomb, …). | +| OTLP push (metrics + traces) | configurable endpoint (default `http://127.0.0.1:4317`) | Whatever the collector requires. Off by default; turn on when you actually have a collector (Tempo, Honeycomb, …). | +| OTLP **log** export | same OTLP endpoint | Off by default — **same** `Observability__Otlp__Enabled` gate. Logs go through an OTel Collector whose redaction processor strips PII before OpenObserve. See [Logs — export & redaction](#logs-export-redaction). | | In-app live view | `/operate/observability` (Admin SPA) | Cookie auth + `observability:read`. Realm-scoped — each admin sees only their own realm. | | REST snapshot | `GET /api/admin/observability/snapshot?windowMinutes=15` | Same as in-app view. Returns event-type counts, login outcome breakdown, per-minute sparkline. | | REST activity feed | `GET /api/admin/observability/activity?limit=50` | Same. Most-recent first, last 60 min, capped at 200. | @@ -33,13 +34,21 @@ Permissions for the in-app live view: `observability:read`. The `realm:admin` by "BearerToken": "" // REQUIRED outside Development; empty = boot fails }, "Otlp": { - "Enabled": false, // default off - "Endpoint": "http://localhost:4317", // gRPC by default + "Enabled": false, // default off — gates metrics, traces AND logs + "Endpoint": "http://127.0.0.1:4317", // gRPC by default (127.0.0.1, not localhost — see note) "Protocol": "Grpc" // or "HttpProtobuf" } } ``` +::: tip One gate for all three signals +`Otlp.Enabled` turns on metrics, traces **and** log export together — there is no separate logs flag by design. With it off, Serilog stays Console + File and nothing leaves the box; no collector / OpenObserve is required. Use a bare base `host:port` endpoint for either protocol — the log sink derives the per-signal path itself (and trims a `/v1/logs` suffix if you add one). +::: + +::: warning Plaintext / local collectors +Against a **plaintext `http://`** collector the metrics/traces exporters speak HTTP/2 cleartext (h2c), which the app enables automatically for `http://` endpoints (`Http2UnencryptedSupport`). Two gotchas for a **local** collector: prefer **`127.0.0.1`** over `localhost` (a `localhost` → IPv6 `::1` resolution can hang the exporter against an IPv4-only Docker port map until the 10 s export timeout), and remember the export is best-effort — a wrong endpoint drops telemetry silently. A production collector should use **TLS (`https://`)**, which negotiates HTTP/2 natively and needs none of this. +::: + ::: tip Set the bearer in env, not in the JSON The committed `configuration.json` ships with an empty `BearerToken` on purpose — so secrets don't land in source control. Production deployments must set `Observability__Prometheus__BearerToken=` in the container's environment. ::: @@ -112,3 +121,43 @@ Each realm-admin sees only their own realm. The cross-realm aggregate ("global-o When `Otlp.Enabled = true`, OpenIddict-token-issuance, ASP.NET request handling, and HTTP-client outbound calls each emit spans with the `service.name` resource attribute. Trace context propagates standard W3C `traceparent` headers, so spans from your downstream APIs (resource servers, MCP servers) reconnect to the auth-server span automatically. `SamplingRatio` controls how much survives. Default 1.0 is fine for dev; production with traffic should drop it to keep trace volume sane (0.1 is a reasonable starting point). + +## Logs — export & redaction {#logs-export-redaction} + +Logs are the third OTel signal. Serilog stays the in-process logger (Console + File); when `Otlp.Enabled = true` an OTLP sink **also** ships every log record to the OTLP endpoint. Records are **realm-tagged** (the `Realm` property from the realm enricher, `system` for background work) and **trace-correlated** (the active `trace_id`/`span_id` ride along), so a log line in the backend links straight to its request span and is filterable per realm. + +The destination is **[OpenObserve](https://openobserve.ai/)**, reached through an **OpenTelemetry Collector** that sits between the app and the backend. + +::: danger The redaction guarantee lives at the collector +PII (emails, JWTs, `Bearer`/`Basic` credentials, IPv4/IPv6 addresses, and usernames) is stripped by a **transform/OTTL processor in the collector**, not by the app. This is deliberate: it is a *pipeline guarantee* that holds even if a call site forgets to mask. The app-side `LogPiiMasking.MaskEmail` stays as a **belt** (defense in depth) but is no longer the thing correctness depends on. + +The processor only redacts the log **body** and top-level string **attribute values** — resource attributes (`service.version`, …) are left alone so e.g. a version `1.0.0.0` isn't mistaken for an IP. The exact field set is **versioned** (`redaction-ruleset: v2`) in [`docker/otel-collector/otel-collector-config.yaml`](https://github.com/cocoar-dev/modgud/blob/develop/docker/otel-collector/otel-collector-config.yaml) and pinned by an end-to-end test (`OtelLogsRedactionTests`) that runs a real collector and asserts PII is gone before export. **If you fork the ruleset, bump the version and re-run that test.** + +Two limits worth knowing, both because the targeted values have no machine-recognisable shape: a **username inlined into free-text prose** other than the `User=` form, and a **nested/destructured (`{@…}`) attribute value**, are out of the collector's reach — log `user.Id` (a GUID) instead of the login identifier, and don't destructure objects that may carry PII. The username **attribute** (`UserName`/`Actor`) and the `User=` body form *are* covered. +::: + +### Failure modes + +The export is **best-effort and lossy by design** (Track B). It must never be load-bearing — the tenant audit (`/admin/audit`, `/admin/auth-log`) is a separate, durable pipeline and is unaffected whether export is on or off. + +| Situation | What happens | What to do | +| --- | --- | --- | +| Gate off (default) | No export. Serilog Console + File only. No collector needed. | Nothing — this is the safe default. | +| Gate on, collector unreachable | The OTLP sink retries with backoff and drops on overflow. **The app keeps running**; local Console + File still have everything. | Alert on the collector being down; logs are not lost locally. | +| Gate on, collector up but **redaction processor removed/misconfigured** | Logs reach OpenObserve **unredacted** — a silent PII leak. | This is the one to guard. Run the **shipped** config; treat the ruleset version as an audited artifact; keep the e2e redaction test green in CI; monitor collector pipeline health. | +| Gate on, `OPENOBSERVE_*` env unset | An unset value expands to empty: the collector still starts **and still redacts**, but export then fails and records are dropped (app + local Console/File unaffected). | Set `OPENOBSERVE_LOGS_ENDPOINT` + `OPENOBSERVE_AUTHORIZATION`; smoke-check that records land. | +| Background / startup logs | Carry `realm=system` (no tenant context yet). | Expected — `system` is the infrastructure catch-all, not a tenant. | + +### Local stack (for trying it out) + +[`docker/docker-compose.observability.yml`](https://github.com/cocoar-dev/modgud/blob/develop/docker/docker-compose.observability.yml) brings up the Collector + OpenObserve so you can watch redacted logs land: + +```bash +docker compose -f docker/docker-compose.observability.yml up -d +# then run the API with export on, pointed at the collector: +# Observability__Otlp__Enabled=true +# Observability__Otlp__Endpoint=http://127.0.0.1:4317 +# OpenObserve UI: http://localhost:5080 (dev creds are in the compose file) +``` + +The collector deployment topology in production (sidecar vs shared, the OpenObserve org/RBAC layout, retention) is an ops decision — the shipped collector config is the redaction contract, not a deployment prescription. diff --git a/src/dotnet/Directory.Packages.props b/src/dotnet/Directory.Packages.props index e86032ee..a3d3138a 100644 --- a/src/dotnet/Directory.Packages.props +++ b/src/dotnet/Directory.Packages.props @@ -68,6 +68,7 @@ + @@ -88,6 +89,12 @@ + + diff --git a/src/dotnet/Modgud.Api.Tests/Audit/AuditEndpointTests.cs b/src/dotnet/Modgud.Api.Tests/Audit/AuditEndpointTests.cs new file mode 100644 index 00000000..66effae3 --- /dev/null +++ b/src/dotnet/Modgud.Api.Tests/Audit/AuditEndpointTests.cs @@ -0,0 +1,96 @@ +using System.Net.Http.Json; +using Marten; +using Microsoft.Extensions.DependencyInjection; +using Modgud.Api.Tests.Infrastructure; +using Modgud.Authentication.Audit; +using Modgud.Authentication.Events; +using Modgud.Infrastructure.Audit; + +namespace Modgud.Api.Tests.Audit; + +/// +/// Integration test for GET /api/admin/audit — the tenant GDPR-audit read +/// surface over the per-realm . Verifies it serves the +/// caller-realm rows (the authed client is a realm-admin) and honours the category +/// filter. Realm isolation itself is physical (per-tenant DB), so it isn't re-tested +/// here. +/// +[Collection(IntegrationTestCollection.Name)] +public class AuditEndpointTests : IntegrationTestBase +{ + public AuditEndpointTests(SharedPostgresFixture fixture) : base(fixture) { } + + private sealed record AuditRowDto(string EventType, string Category, string? Ip, string? User); + + [Fact] + public async Task Get_returns_realm_audit_rows_and_honours_category_filter() + { + var ct = TestContext.Current.CancellationToken; + + // Seed a login (authentication) + a password change (account) on a user stream. + var user = await Factory.CreateTestUserWithIdentityAsync("Audit", "Endpoint", "ae", "audit-ep@acme.com"); + using (var scope = Factory.Services.CreateScope()) + { + var session = scope.ServiceProvider.GetRequiredService(); + session.Events.Append(user.Id, new UserLoggedInEvent(user.Id, "203.0.113.1", "password")); + session.Events.Append(user.Id, new UserPasswordChangedEvent(user.Id, null)); + await session.SaveChangesAsync(ct); + } + + // Materialize the async projection (no live daemon in tests). + var store = Factory.Services.GetRequiredService(); + using (var daemon = await store.BuildProjectionDaemonAsync("system")) + await daemon.RebuildProjectionAsync(TimeSpan.FromMinutes(2), ct); + + // Unfiltered → includes our login + password-change rows. + var all = await Client.GetFromJsonAsync>("/api/admin/audit?limit=500", JsonOptions, ct); + Assert.NotNull(all); + Assert.Contains(all!, r => r.EventType == AuditEvents.LoginSucceeded); + Assert.Contains(all!, r => r.EventType == AuditEvents.AccountPasswordChanged); + // the actor's identity is resolved at read time (joined from ApplicationUser) + Assert.Contains(all!, r => !string.IsNullOrEmpty(r.User)); + + // Category filter narrows to authentication only. + var auth = await Client.GetFromJsonAsync>( + $"/api/admin/audit?category={AuditCategories.Authentication}", JsonOptions, ct); + Assert.NotNull(auth); + Assert.NotEmpty(auth!); + Assert.All(auth!, r => Assert.Equal(AuditCategories.Authentication, r.Category)); + } + + [Fact] + public async Task Get_hides_rows_older_than_the_visibility_window() + { + var ct = TestContext.Current.CancellationToken; + var uid = Guid.NewGuid(); + + // Store one recent + one 100-day-old row directly (the visibility window is + // about Timestamp, which the projection can't backdate). + using (var doc = GetTenantedDocumentSession()) + { + doc.Store(new AuthAuditView + { + Id = Guid.NewGuid(), + Timestamp = DateTimeOffset.UtcNow, + Category = AuditCategories.Account, + EventType = AuditEvents.AccountActivated, + UserId = uid, + }); + doc.Store(new AuthAuditView + { + Id = Guid.NewGuid(), + Timestamp = DateTimeOffset.UtcNow.AddDays(-100), + Category = AuditCategories.Account, + EventType = AuditEvents.AccountDeactivated, + UserId = uid, + }); + await doc.SaveChangesAsync(ct); + } + + // Default window is 90 days: the recent row shows, the 100-day-old one is hidden. + var rows = await Client.GetFromJsonAsync>("/api/admin/audit?limit=1000", JsonOptions, ct); + Assert.NotNull(rows); + Assert.Contains(rows!, r => r.EventType == AuditEvents.AccountActivated); + Assert.DoesNotContain(rows!, r => r.EventType == AuditEvents.AccountDeactivated); + } +} diff --git a/src/dotnet/Modgud.Api.Tests/Audit/AuditErasureSurvivalTests.cs b/src/dotnet/Modgud.Api.Tests/Audit/AuditErasureSurvivalTests.cs new file mode 100644 index 00000000..9b4ff7c6 --- /dev/null +++ b/src/dotnet/Modgud.Api.Tests/Audit/AuditErasureSurvivalTests.cs @@ -0,0 +1,78 @@ +using Marten; +using Microsoft.Extensions.DependencyInjection; +using Modgud.Api.Tests.Infrastructure; +using Modgud.Authentication.Audit; +using Modgud.Authentication.Events; +using Modgud.Authentication.Gdpr; +using Modgud.Infrastructure.Audit; + +namespace Modgud.Api.Tests.Audit; + +/// +/// The load-bearing GDPR claim of the audit redesign: a permanently-erased user is +/// MASKED, not deleted — so their audit rows must SURVIVE, de-identified (Ip null), +/// and survive a full projection rebuild. Rebuild durability comes from +/// AuthAuditViewProjection.IncludeArchivedEvents = true (the masked events are +/// archived, not deleted); live freshness comes from the erase-time Ip refresh in +/// GdprService.PerformPermanentEraseAsync. See §A.4.2 of the design doc. +/// +[Collection(IntegrationTestCollection.Name)] +public class AuditErasureSurvivalTests : IntegrationTestBase +{ + public AuditErasureSurvivalTests(SharedPostgresFixture fixture) : base(fixture) { } + + [Fact] + public async Task Erased_user_audit_rows_survive_deidentified_across_rebuild() + { + var ct = TestContext.Current.CancellationToken; + var user = await Factory.CreateTestUserWithIdentityAsync("Erase", "Audit", "ea", "erase-audit@acme.com"); + + const string ip = "203.0.113.9"; + using (var scope = Factory.Services.CreateScope()) + { + var session = scope.ServiceProvider.GetRequiredService(); + session.Events.Append(user.Id, new UserLoggedInEvent(user.Id, ip, "password")); + await session.SaveChangesAsync(ct); + } + await RebuildAuthAuditAsync(ct); + + // Before erase: the login row carries the IP. + await using (var qs = GetTenantedSession()) + { + var before = await qs.Query().Where(r => r.UserId == user.Id).ToListAsync(ct); + Assert.Contains(before, r => r.EventType == AuditEvents.LoginSucceeded && r.Ip == ip); + } + + // GDPR permanent erase — masks + archives the user stream. + using (var scope = Factory.Services.CreateScope()) + { + var gdpr = scope.ServiceProvider.GetRequiredService(); + var r = await gdpr.PermanentlyEraseAsync(user.Id, adminUserId: null, reason: "audit-survival-test", ct); + Assert.False(r.IsError, r.IsError ? r.FirstError.Description : null); + } + + // Live view: rows SURVIVE, de-identified (Ip null) — NOT deleted. + await using (var qs = GetTenantedSession()) + { + var live = await qs.Query().Where(r => r.UserId == user.Id).ToListAsync(ct); + Assert.Contains(live, r => r.EventType == AuditEvents.LoginSucceeded); + Assert.All(live, r => Assert.Null(r.Ip)); + } + + // Durable across a full rebuild: IncludeArchivedEvents regenerates the rows + // from the masked archived events (still de-identified). + await RebuildAuthAuditAsync(ct); + await using (var qs = GetTenantedSession()) + { + var afterRebuild = await qs.Query().Where(r => r.UserId == user.Id).ToListAsync(ct); + Assert.Contains(afterRebuild, r => r.EventType == AuditEvents.LoginSucceeded && r.Ip == null); + } + } + + private async Task RebuildAuthAuditAsync(CancellationToken ct) + { + var store = Factory.Services.GetRequiredService(); + using var daemon = await store.BuildProjectionDaemonAsync("system"); + await daemon.RebuildProjectionAsync(TimeSpan.FromMinutes(2), ct); + } +} diff --git a/src/dotnet/Modgud.Api.Tests/Audit/AuthAuditViewProjectionTests.cs b/src/dotnet/Modgud.Api.Tests/Audit/AuthAuditViewProjectionTests.cs new file mode 100644 index 00000000..45bac2c6 --- /dev/null +++ b/src/dotnet/Modgud.Api.Tests/Audit/AuthAuditViewProjectionTests.cs @@ -0,0 +1,81 @@ +using Marten; +using Microsoft.Extensions.DependencyInjection; +using Modgud.Api.Tests.Infrastructure; +using Modgud.Authentication.Audit; +using Modgud.Authentication.Events; +using Modgud.Infrastructure.Audit; + +namespace Modgud.Api.Tests.Audit; + +/// +/// Phase 0 scaffold proof: the folds +/// user-stream auth/lifecycle events into flat, per-event +/// rows carrying typed category + event-type + realm — without copying PII payloads. +/// Mirrors the explicit daemon-rebuild pattern from ProjectionRebuildTests +/// (MasterTableTenancy → build the daemon for the "system" realm DB). +/// +[Collection(IntegrationTestCollection.Name)] +public class AuthAuditViewProjectionTests : IntegrationTestBase +{ + public AuthAuditViewProjectionTests(SharedPostgresFixture fixture) : base(fixture) { } + + [Fact] + public async Task Projects_user_stream_events_into_flat_typed_audit_rows() + { + var ct = TestContext.Current.CancellationToken; + var user = await Factory.CreateTestUserWithIdentityAsync("Audit", "Scaffold", "as", "audit-scaffold@acme.com"); + + const string ip = "203.0.113.7"; + using (var scope = Factory.Services.CreateScope()) + { + var session = scope.ServiceProvider.GetRequiredService(); + // A login marker (carries method + ip) + a password change, plus an + // aggregated failure-streak record — all on the user's stream. + session.Events.Append(user.Id, new UserLoggedInEvent(user.Id, ip, "password")); + session.Events.Append(user.Id, new UserPasswordChangedEvent(user.Id, null)); + session.Events.Append(user.Id, new UserLoginFailuresObservedEvent(user.Id, 3, DateTimeOffset.UtcNow)); + await session.SaveChangesAsync(ct); + } + + await RebuildAuthAuditAsync(ct); + + await using var qs = GetTenantedSession(); + var rows = await qs.Query().Where(r => r.UserId == user.Id).ToListAsync(ct); + + Assert.NotEmpty(rows); + + // The login marker projects to an authentication row that keeps the method + IP. + Assert.Contains(rows, r => + r.EventType == AuditEvents.LoginSucceeded && + r.Category == AuditCategories.Authentication && + r.Ip == ip && + r.Method == "password" && + r.UserId == user.Id); + + // The aggregated failure streak projects with its count (Decision (b)). + Assert.Contains(rows, r => + r.EventType == AuditEvents.LoginFailuresObserved && + r.Category == AuditCategories.Authentication && + r.Count == 3); + + // The password change projects to an account-category row. + Assert.Contains(rows, r => + r.EventType == AuditEvents.AccountPasswordChanged && + r.Category == AuditCategories.Account); + + // User creation already produced account-lifecycle rows on the stream. + Assert.Contains(rows, r => r.Category == AuditCategories.Account); + + // Per-tenant view: every row is realm-tagged (here, the system realm). + Assert.All(rows, r => Assert.False(string.IsNullOrEmpty(r.Realm))); + } + + private async Task RebuildAuthAuditAsync(CancellationToken ct) + { + // MasterTableTenancy disables the default tenant — build the daemon for the + // "system" realm DB explicitly (mirrors ProjectionRebuildTests / RecoveryCli). + var store = Factory.Services.GetRequiredService(); + using var daemon = await store.BuildProjectionDaemonAsync("system"); + await daemon.RebuildProjectionAsync(TimeSpan.FromMinutes(2), ct); + } +} diff --git a/src/dotnet/Modgud.Api.Tests/Audit/LoginFailureStreakEmissionTests.cs b/src/dotnet/Modgud.Api.Tests/Audit/LoginFailureStreakEmissionTests.cs new file mode 100644 index 00000000..cb6d41c5 --- /dev/null +++ b/src/dotnet/Modgud.Api.Tests/Audit/LoginFailureStreakEmissionTests.cs @@ -0,0 +1,54 @@ +using Marten; +using Microsoft.AspNetCore.Identity; +using Microsoft.Extensions.DependencyInjection; +using Modgud.Api.Tests.Infrastructure; +using Modgud.Authentication.Domain; +using Modgud.Authentication.Events; + +namespace Modgud.Api.Tests.Audit; + +/// +/// Proves the Phase 1 emission logic in EventSourcedUserStore.AppendSecurityChangeEvents: +/// a known-user failure streak is recorded as exactly ONE aggregated +/// when the access-failed counter +/// resolves (>0 → 0), not one event per attempt (Decision (b)). +/// +[Collection(IntegrationTestCollection.Name)] +public class LoginFailureStreakEmissionTests : IntegrationTestBase +{ + public LoginFailureStreakEmissionTests(SharedPostgresFixture fixture) : base(fixture) { } + + [Fact] + public async Task Resolving_a_failure_streak_emits_one_aggregated_event_with_the_count() + { + var ct = TestContext.Current.CancellationToken; + var user = await Factory.CreateTestUserWithIdentityAsync("Streak", "Resolver", "sr", "streak@acme.com"); + + using (var scope = Factory.Services.CreateScope()) + { + var userManager = scope.ServiceProvider.GetRequiredService>(); + var u = await userManager.FindByIdAsync(user.Id.ToString()); + Assert.NotNull(u); + + // Three failed attempts (below the lockout threshold): the counter goes + // 0 → 3. No event yet — failures are NOT recorded per attempt. + await userManager.AccessFailedAsync(u!); + await userManager.AccessFailedAsync(u!); + await userManager.AccessFailedAsync(u!); + + // The streak resolves (what a successful sign-in does): 3 → 0 → ONE event. + await userManager.ResetAccessFailedCountAsync(u!); + } + + await using var qs = GetTenantedSession(); + var stream = await qs.Events.FetchStreamAsync(user.Id, token: ct); + var observed = stream + .Select(e => e.Data) + .OfType() + .ToList(); + + var ev = Assert.Single(observed); + Assert.Equal(3, ev.FailedCount); + Assert.Equal(user.Id, ev.UserId); + } +} diff --git a/src/dotnet/Modgud.Api.Tests/Audit/SecurityAuditStoreTests.cs b/src/dotnet/Modgud.Api.Tests/Audit/SecurityAuditStoreTests.cs new file mode 100644 index 00000000..460ca26e --- /dev/null +++ b/src/dotnet/Modgud.Api.Tests/Audit/SecurityAuditStoreTests.cs @@ -0,0 +1,120 @@ +using Marten; +using Microsoft.Extensions.DependencyInjection; +using Modgud.Api.Tests.Infrastructure; +using Modgud.Authentication.Gdpr; +using Modgud.Infrastructure.Audit; + +namespace Modgud.Api.Tests.Audit; + +/// +/// The streamless security/ops store (logging/audit redesign Track A, §A.5): +/// records about UNidentified actors + operational actions, in the system DB under +/// Art. 6(1)(f) legitimate interest. Two load-bearing claims are tested here: +/// (1) these records are NOT in the per-subject GDPR-erase path — they rely on the +/// short retention window, not erasure (Open Decision #4 = time-expiry only); and +/// (2) clearing the log is itself audited (audit-of-the-audit) with the operator's +/// identity. The control-plane test admin sees + clears the full cross-realm log. +/// +[Collection(IntegrationTestCollection.Name)] +public class SecurityAuditStoreTests : IntegrationTestBase +{ + public SecurityAuditStoreTests(SharedPostgresFixture fixture) : base(fixture) { } + + [Fact] + public async Task Streamless_record_survives_user_permanent_erase() + { + var ct = TestContext.Current.CancellationToken; + + // A registered user whose email also appears as the ATTEMPTED actor on a + // pre-registration failed-login row in the streamless store. + const string email = "boundary-victim@acme.com"; + var user = await Factory.CreateTestUserWithIdentityAsync("Boundary", "Victim", "bv", email); + + var rowId = Guid.NewGuid(); + await using (var write = GetTenantedDocumentSession("system")) + { + write.Store(new SecurityAuditEntry + { + Id = rowId, + Timestamp = DateTimeOffset.UtcNow, + Level = "Warning", + EventType = AuditEvents.LoginFailedUnknownUser, + Actor = email, + Ip = "203.0.113.50", + Realm = "system", + Message = $"Login failed for {email} — user not found or inactive", + }); + await write.SaveChangesAsync(ct); + } + + // Permanent-erase the user. The streamless store has no user stream to attach + // to and is deliberately OUTSIDE the per-subject erase path. + using (var scope = Factory.Services.CreateScope()) + { + var gdpr = scope.ServiceProvider.GetRequiredService(); + var r = await gdpr.PermanentlyEraseAsync(user.Id, adminUserId: null, reason: "streamless-boundary-test", ct); + Assert.False(r.IsError, r.IsError ? r.FirstError.Description : null); + } + + // The streamless record SURVIVES the erase (it expires only via retention). + await using (var read = GetTenantedDocumentSession("system")) + { + var survived = await read.LoadAsync(rowId, ct); + Assert.NotNull(survived); + Assert.Equal(email, survived!.Actor); + } + } + + [Fact] + public async Task Clear_is_audited_with_the_operator_identity() + { + var ct = TestContext.Current.CancellationToken; + + // Something to clear. + await using (var write = GetTenantedDocumentSession("system")) + { + write.Store(new SecurityAuditEntry + { + Id = Guid.NewGuid(), + Timestamp = DateTimeOffset.UtcNow, + Level = "Warning", + EventType = AuditEvents.LoginFailedUnknownUser, + Actor = "to-be-cleared", + Realm = "system", + Message = "seed row for clear test", + }); + await write.SaveChangesAsync(ct); + } + + // Control-plane admin clears the full cross-realm log. + var resp = await Client.DeleteAsync("/api/admin/auth-log", ct); + resp.EnsureSuccessStatusCode(); + + // The clear emits a typed audit.log_cleared record AFTER the wipe (the + // forensic trail of who cleared what). It rides the best-effort async writer, + // so poll briefly for it to land. + var cleared = await PollForAsync( + r => r.EventType == AuditEvents.AuditLogCleared, ct); + + Assert.NotNull(cleared); + Assert.Equal("cleared", cleared!.Status); + Assert.False(string.IsNullOrEmpty(cleared.Actor)); + Assert.NotEqual("(unknown)", cleared.Actor); + } + + private async Task PollForAsync( + Func predicate, CancellationToken ct) + { + for (var i = 0; i < 25; i++) + { + await using (var read = GetTenantedDocumentSession("system")) + { + var hit = (await read.Query().ToListAsync(ct)) + .FirstOrDefault(predicate); + if (hit is not null) return hit; + } + await Task.Delay(200, ct); + } + return null; + } +} diff --git a/src/dotnet/Modgud.Api.Tests/Authorization/AuthLogTenantVisibilityTests.cs b/src/dotnet/Modgud.Api.Tests/Authorization/AuthLogTenantVisibilityTests.cs index bb218bdf..8445af48 100644 --- a/src/dotnet/Modgud.Api.Tests/Authorization/AuthLogTenantVisibilityTests.cs +++ b/src/dotnet/Modgud.Api.Tests/Authorization/AuthLogTenantVisibilityTests.cs @@ -1,50 +1,57 @@ using System.Net.Http.Json; using Modgud.Api.Tests.Infrastructure; -using Modgud.Authentication.AuthLog; -using Marten; +using Modgud.Infrastructure.Audit; namespace Modgud.Api.Tests.Authorization; /// -/// Auth-log entries are persisted to the system DB and attributed to a realm. -/// The read endpoint reaches that system DB and returns the realm field; the -/// control-plane (system) realm — which the default test admin runs in — sees -/// the full cross-realm log. The per-realm exclusion of the filter itself is -/// unit-tested deterministically in -/// AuthLogAttributionTests.Scope_TenantRealm_SeesOnlyOwnRealm (a tenant -/// realm-admin authenticated request needs full multi-realm host routing + a -/// per-tenant login, out of proportion for this trivial Where). +/// Streamless security-store entries live in the system DB and are attributed to a +/// realm. The read endpoint (GET /api/admin/auth-log) reaches that system DB +/// and returns the realm field; the control-plane (system) realm — which the default +/// test admin runs in — sees the full cross-realm log INCLUDING control-plane-only +/// (PlatformOnly) operational rows. The per-realm + tenant-visibility exclusion +/// of the filter itself is unit-tested deterministically in +/// AuthLogAttributionTests (a tenant realm-admin authenticated request needs +/// full multi-realm host routing + a per-tenant login, out of proportion here). /// [Collection(IntegrationTestCollection.Name)] public class AuthLogTenantVisibilityTests : IntegrationTestBase { public AuthLogTenantVisibilityTests(SharedPostgresFixture fixture) : base(fixture) { } + private sealed record Row(string Message, string? Realm); + [Fact] - public async Task Read_AsControlPlaneAdmin_ReturnsAllRealms_WithRealmField() + public async Task Read_AsControlPlaneAdmin_ReturnsAllRealms_IncludingPlatformOnly() { + var ct = TestContext.Current.CancellationToken; + // Entries live in the system DB regardless of which realm emitted them. await using (var write = GetTenantedDocumentSession("system")) { - write.Store(new AuthLogDocument { Timestamp = DateTimeOffset.UtcNow, Level = "Info", Message = "sk-vis-system", Realm = "system" }); - write.Store(new AuthLogDocument { Timestamp = DateTimeOffset.UtcNow, Level = "Info", Message = "sk-vis-acme", Realm = "acme" }); - write.Store(new AuthLogDocument { Timestamp = DateTimeOffset.UtcNow, Level = "Info", Message = "sk-vis-unattributed", Realm = null }); - await write.SaveChangesAsync(TestContext.Current.CancellationToken); + write.Store(new SecurityAuditEntry { Timestamp = DateTimeOffset.UtcNow, Level = "Info", EventType = AuditEvents.LoginFailedUnknownUser, Message = "sk-vis-system", Realm = "system", PlatformOnly = false }); + write.Store(new SecurityAuditEntry { Timestamp = DateTimeOffset.UtcNow, Level = "Info", EventType = AuditEvents.LoginFailedUnknownUser, Message = "sk-vis-acme", Realm = "acme", PlatformOnly = false }); + write.Store(new SecurityAuditEntry { Timestamp = DateTimeOffset.UtcNow, Level = "Info", EventType = AuditEvents.LoginFailedUnknownUser, Message = "sk-vis-unattributed", Realm = null, PlatformOnly = false }); + // A control-plane-only operational row — visible to the control-plane reader. + write.Store(new SecurityAuditEntry { Timestamp = DateTimeOffset.UtcNow, Level = "Warning", EventType = AuditEvents.RecoveryCliInvoked, Message = "sk-vis-platform", Realm = "acme", PlatformOnly = true }); + await write.SaveChangesAsync(ct); } // The default Client is a realm-admin in the system (control-plane) realm. - var entries = await Client.GetFromJsonAsync>( - "/api/admin/auth-log?limit=500", TestContext.Current.CancellationToken); + var entries = await Client.GetFromJsonAsync>( + "/api/admin/auth-log?limit=500", ct); Assert.NotNull(entries); var byMessage = entries! .Where(e => e.Message.StartsWith("sk-vis-")) .ToDictionary(e => e.Message, e => e.Realm); - // Control-plane sees its own realm AND other realms AND unattributed events. + // Control-plane sees its own realm AND other realms AND unattributed events AND + // control-plane-only operational rows. Assert.Equal("system", byMessage["sk-vis-system"]); Assert.Equal("acme", byMessage["sk-vis-acme"]); Assert.True(byMessage.ContainsKey("sk-vis-unattributed")); Assert.Null(byMessage["sk-vis-unattributed"]); + Assert.True(byMessage.ContainsKey("sk-vis-platform")); // PlatformOnly row visible to control-plane } } diff --git a/src/dotnet/Modgud.Api.Tests/Modgud.Api.Tests.csproj b/src/dotnet/Modgud.Api.Tests/Modgud.Api.Tests.csproj index 7ac7f937..12798958 100644 --- a/src/dotnet/Modgud.Api.Tests/Modgud.Api.Tests.csproj +++ b/src/dotnet/Modgud.Api.Tests/Modgud.Api.Tests.csproj @@ -19,6 +19,8 @@ + + + + PreserveNewest + + + PreserveNewest + diff --git a/src/dotnet/Modgud.Api.Tests/Observability/OtelLogsRedactionTests.cs b/src/dotnet/Modgud.Api.Tests/Observability/OtelLogsRedactionTests.cs new file mode 100644 index 00000000..488aa55d --- /dev/null +++ b/src/dotnet/Modgud.Api.Tests/Observability/OtelLogsRedactionTests.cs @@ -0,0 +1,199 @@ +using DotNet.Testcontainers.Builders; +using DotNet.Testcontainers.Containers; +using Serilog; +using Serilog.Sinks.OpenTelemetry; + +namespace Modgud.Api.Tests.Observability; + +/// +/// Phase 4 (§B.2) — the redaction GUARANTEE is proven end-to-end against a real +/// OTel Collector, not a config unit test. A log carrying PII is emitted through +/// the SAME Serilog → OTLP sink the app uses, into a collector running the SAME +/// redaction processor that ships, and the exported output is asserted to be +/// scrubbed before it would ever reach OpenObserve. +/// +/// If the redaction processor is removed or misconfigured, this test fails — it +/// is the executable form of the "operationally conditional" guarantee. +/// +/// Readback is the collector's debug exporter (stdout via GetLogsAsync): no bind +/// mount / writable volume, so it is portable across Windows/arm64 dev and the +/// amd64 CI runner. +/// +public class OtelLogsRedactionTests +{ + // arm64-safe contrib tag (the transform/OTTL processor is contrib-only). + private const string CollectorImage = "otel/opentelemetry-collector-contrib:0.153.0"; + + private static string TestConfigPath => + Path.Combine(AppContext.BaseDirectory, "Observability", "otel-collector-test-config.yaml"); + + private static string ShippedConfigPath => + Path.Combine(AppContext.BaseDirectory, "Observability", "otel-collector-config.shipped.yaml"); + + private const string RulesetStart = "# >>> redaction-ruleset: v2"; + private const string RulesetEnd = "# >>> end redaction-ruleset: v2 <<<"; + + /// + /// Anti-drift: the redaction block the e2e test exercises must be the one + /// that ships. Cheap, no Docker — guards against the test passing on a stale + /// copy of the rules. + /// + [Fact] + public void RedactionRuleset_TestConfig_MatchesShipped() + { + var testBlock = ExtractRuleset(File.ReadAllText(TestConfigPath)); + var shippedBlock = ExtractRuleset(File.ReadAllText(ShippedConfigPath)); + + Assert.False(string.IsNullOrWhiteSpace(testBlock), "test config has no redaction-ruleset block"); + Assert.Equal(shippedBlock, testBlock); + } + + /// + /// The block match above pins the processor DEFINITION; this pins the + /// shipped pipeline WIRING. Without it, dropping transform/redaction from the + /// shipped logs pipeline would leave both other tests green while a real + /// collector exported PII un-redacted. + /// + [Fact] + public void ShippedPipeline_WiresRedactionBeforeExport() + { + var shipped = File.ReadAllText(ShippedConfigPath); + var procIdx = shipped.IndexOf("processors: [", StringComparison.Ordinal); + Assert.True(procIdx >= 0, "shipped config has no pipeline processors list"); + + var listEnd = shipped.IndexOf(']', procIdx); + Assert.True(listEnd > procIdx, "malformed processors list in shipped config"); + var procList = shipped.Substring(procIdx, listEnd - procIdx); + + var redactionIdx = procList.IndexOf("transform/redaction", StringComparison.Ordinal); + var batchIdx = procList.IndexOf("batch", StringComparison.Ordinal); + Assert.True(redactionIdx >= 0, "shipped logs pipeline does not wire the redaction processor"); + Assert.True(batchIdx < 0 || redactionIdx < batchIdx, + "redaction must run before batch/export in the shipped logs pipeline"); + } + + [Fact] + public async Task PiiInLogs_IsRedactedByCollector_BeforeExport() + { + // PII samples — distinct, recognisable values we can assert disappeared. + const string email = "john.doe@example.com"; + const string ipv4 = "203.0.113.45"; + const string ipv6 = "2001:db8::1"; // ::-compressed (leading group) + const string ipv6Loopback = "::1"; // leading-:: form + const string jwt = "eyJhbGciOiJIUzI1.eyJzdWIiOiIxMjM0NTY3.SflKxwRJSMeKK"; + const string bodyCred = "abc.def123"; // becomes "bearer abc.def123" in the body + const string attrCred = "Bearer xyz.tok456"; // a credential carried in an attribute + const string username = "bob_smith"; // non-email login id: in the body ("User=...") AND the UserName attribute + const string timestamp = "12:34:56"; // must SURVIVE (not an IP) + const string realm = "acme"; // must SURVIVE (the realm tag) + const string serviceVersion = "1.0.0.0"; // resource attr, must SURVIVE + + await using var collector = new ContainerBuilder() + .WithImage(CollectorImage) + // Copy the config in via the Docker API (no host bind mount). + .WithResourceMapping(File.ReadAllBytes(TestConfigPath), "/etc/otelcol-contrib/config.yaml") + .WithPortBinding(4317, true) + .WithWaitStrategy(Wait.ForUnixContainer().UntilMessageIsLogged("Everything is ready")) + .Build(); + + await collector.StartAsync(); + var otlpEndpoint = $"http://localhost:{collector.GetMappedPublicPort(4317)}"; + + // Emit through the REAL sink: Serilog → OTLP gRPC → collector. The + // resource attributes mirror Program.cs (incl. service.instance.id); the + // Realm/UserName properties mirror what the app stamps. + using (var logger = new LoggerConfiguration() + .MinimumLevel.Information() + .WriteTo.OpenTelemetry(o => + { + o.Endpoint = otlpEndpoint; + o.Protocol = OtlpProtocol.Grpc; + o.ResourceAttributes = new Dictionary + { + ["service.name"] = "modgud-redaction-test", + ["service.version"] = serviceVersion, + ["service.instance.id"] = Environment.MachineName, + }; + }) + .CreateLogger()) + { + logger + .ForContext("Realm", realm) + .ForContext("Email", email) + .ForContext("ClientIp", ipv4) + .ForContext("V6", ipv6) + .ForContext("V6Loopback", ipv6Loopback) + .ForContext("Authorization", attrCred) // redacted by the attr bearer rule + // {UserName} renders into the "User=" body (body rule) and also + // becomes the UserName attribute (dropped by delete_key). + .Information( + "Login failed for {Email} from {ClientIp} bearer {Cred} jwt {Jwt} at {Time} v6 {V6} lo {V6Lo} User={UserName}", + email, ipv4, bodyCred, jwt, timestamp, ipv6, ipv6Loopback, username); + } // dispose flushes the OTLP sink + + var exported = await PollForExportedRecordAsync(collector); + + // --- PII must be gone (body AND attributes) --- + Assert.DoesNotContain(email, exported); + Assert.DoesNotContain(ipv4, exported); + Assert.DoesNotContain(ipv6, exported); + Assert.DoesNotContain(ipv6Loopback, exported); // leading-:: form covered + Assert.DoesNotContain(jwt, exported); + Assert.DoesNotContain("bearer " + bodyCred, exported); + Assert.DoesNotContain("xyz.tok456", exported); // credential in an attribute + Assert.DoesNotContain(username, exported); // username: body "User=" + UserName attribute + + // --- redaction markers must be present --- + Assert.Contains("[REDACTED_EMAIL]", exported); + Assert.Contains("[REDACTED_IP]", exported); + Assert.Contains("[REDACTED_TOKEN]", exported); + Assert.Contains("[REDACTED_AUTHORIZATION]", exported); + Assert.Contains("[REDACTED_USER]", exported); + + // --- non-PII must survive (no over-redaction) --- + Assert.Contains(timestamp, exported); // HH:MM:SS is not an IP + Assert.Contains(realm, exported); // realm tag travels through + Assert.Contains(serviceVersion, exported); // service.version not nuked as IPv4 + } + + /// + /// Sink dispose flushed; the collector receives over gRPC, batches (1s) and + /// prints the post-redaction record to stdout via the debug exporter. Poll + /// the container logs until that record appears. + /// + private static async Task PollForExportedRecordAsync(IContainer collector) + { + var deadline = DateTime.UtcNow.AddSeconds(30); + while (DateTime.UtcNow < deadline) + { + var (stdout, stderr) = await collector.GetLogsAsync(); + var combined = stdout + "\n" + stderr; + // The debug exporter prints "Body: Str(...)" only for an exported record. + if (combined.Contains("Body: Str(")) + { + return combined; + } + + await Task.Delay(500); + } + + var (finalOut, finalErr) = await collector.GetLogsAsync(); + throw new Xunit.Sdk.XunitException( + "Collector exported no log record within 30s.\n--- stdout ---\n" + finalOut + + "\n--- stderr ---\n" + finalErr); + } + + private static string ExtractRuleset(string yaml) + { + var start = yaml.IndexOf(RulesetStart, StringComparison.Ordinal); + var end = yaml.IndexOf(RulesetEnd, StringComparison.Ordinal); + if (start < 0 || end < 0) + { + return string.Empty; + } + + // Normalise line endings so a CRLF/LF checkout difference between the two + // files can't fail an otherwise-identical block. + return yaml.Substring(start, end - start + RulesetEnd.Length).Replace("\r\n", "\n"); + } +} diff --git a/src/dotnet/Modgud.Api.Tests/Observability/otel-collector-test-config.yaml b/src/dotnet/Modgud.Api.Tests/Observability/otel-collector-test-config.yaml new file mode 100644 index 00000000..5e198676 --- /dev/null +++ b/src/dotnet/Modgud.Api.Tests/Observability/otel-collector-test-config.yaml @@ -0,0 +1,104 @@ +# Test-only OTel Collector config for OtelLogsRedactionTests. +# +# Identical to the shipped docker/otel-collector/otel-collector-config.yaml in +# its redaction processor (the block between the redaction-ruleset sentinels is +# asserted byte-for-byte equal by RedactionRuleset_TestConfig_MatchesShipped), +# so the rules under test never drift from the ones that ship. The ONLY +# difference is the exporter: a debug exporter the test reads back from stdout, +# instead of OpenObserve. + +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + # >>> redaction-ruleset: v2 — BUMP THE VERSION IF YOU EDIT THIS BLOCK <<< + # + # The exact PII field set the guarantee targets. Versioned because the + # guarantee is only as good as this block (§B.2): the end-to-end test + # (Modgud.Api.Tests/Observability/OtelLogsRedactionTests) pins it against a + # real collector, and this version string is what an operator audits. The test + # also asserts this block matches the test collector's copy verbatim AND that + # the shipped pipeline actually wires this processor before export, so the rules + # under test never drift from the ones that ship. + # + # Scope: the log BODY (rendered message) and every top-level string ATTRIBUTE + # value (Serilog properties land here). Resource attributes (service.version, + # ...) are deliberately NOT touched — e.g. service.version "1.0.0.0" must not be + # mistaken for an IPv4 address. + # + # Known limits (must be masked at the call site if they can carry PII): + # - replace_all_patterns(value) does NOT recurse into nested Map/Slice + # attribute values, so destructured objects ({@...}) / logged collections + # are out of scope here — mask those at the source. + # - a bare secret with no adjacent bearer/basic keyword has no value shape; + # it is caught in the body only via the scheme, not as an attribute value. + # - usernames have no value shape: the known UserName/Actor carrier attributes + # are dropped and the "User=" body form is masked, but a login identifier + # inlined into other free-text prose relies on the call site not logging it + # raw (log user.Id instead). + # - error_mode: ignore -> a per-statement runtime error skips THAT statement + # (best-effort, Track B). The statements are simple guarded regex replaces, + # so the leak surface is narrow; do NOT switch to propagate without analysis + # (propagate drops the whole payload on any error). + # + # v2 targets (regexes are single-quoted YAML so backslashes are literal): + # - email addresses -> [REDACTED_EMAIL] + # - JWTs (eyJ.x.y) -> [REDACTED_TOKEN] + # - Bearer/Basic creds -> [REDACTED_AUTHORIZATION] + # - IPv4 addresses -> [REDACTED_IP] + # - IPv6 addresses -> [REDACTED_IP] (full, ::-compressed, and leading-::) + # - usernames -> UserName/Actor attrs dropped; "User=" body masked + transform/redaction: + error_mode: ignore + log_statements: + - context: log + statements: + # --- log body (rendered message), only when it is a string --- + - 'replace_pattern(log.body, "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}", "[REDACTED_EMAIL]") where IsString(log.body)' + - 'replace_pattern(log.body, "eyJ[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+", "[REDACTED_TOKEN]") where IsString(log.body)' + - 'replace_pattern(log.body, "(?i)(?:bearer|basic)\\s+[A-Za-z0-9._~+/-]+=*", "[REDACTED_AUTHORIZATION]") where IsString(log.body)' + - 'replace_pattern(log.body, "\\b(?:[0-9]{1,3}\\.){3}[0-9]{1,3}\\b", "[REDACTED_IP]") where IsString(log.body)' + - 'replace_pattern(log.body, "(?:[0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}", "[REDACTED_IP]") where IsString(log.body)' + - 'replace_pattern(log.body, "(?:[0-9A-Fa-f]{1,4}:){1,7}:[0-9A-Fa-f]{0,4}", "[REDACTED_IP]") where IsString(log.body)' + - 'replace_pattern(log.body, "::[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*", "[REDACTED_IP]") where IsString(log.body)' + - 'replace_pattern(log.body, "(?i)\\bUser=[^\\s,;]+", "User=[REDACTED_USER]") where IsString(log.body)' + # --- all top-level string attribute values (Serilog properties) --- + - 'replace_all_patterns(log.attributes, "value", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}", "[REDACTED_EMAIL]")' + - 'replace_all_patterns(log.attributes, "value", "eyJ[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+", "[REDACTED_TOKEN]")' + - 'replace_all_patterns(log.attributes, "value", "(?i)(?:bearer|basic)\\s+[A-Za-z0-9._~+/-]+=*", "[REDACTED_AUTHORIZATION]")' + - 'replace_all_patterns(log.attributes, "value", "\\b(?:[0-9]{1,3}\\.){3}[0-9]{1,3}\\b", "[REDACTED_IP]")' + - 'replace_all_patterns(log.attributes, "value", "(?:[0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}", "[REDACTED_IP]")' + - 'replace_all_patterns(log.attributes, "value", "(?:[0-9A-Fa-f]{1,4}:){1,7}:[0-9A-Fa-f]{0,4}", "[REDACTED_IP]")' + - 'replace_all_patterns(log.attributes, "value", "::[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*", "[REDACTED_IP]")' + # usernames have no value shape -> drop the known carrier attributes. + - 'delete_key(log.attributes, "UserName")' + - 'delete_key(log.attributes, "Actor")' + # >>> end redaction-ruleset: v2 <<< + + batch: + timeout: 1s + send_batch_size: 1 + send_batch_max_size: 1 + +exporters: + # Readback for the test: the post-redaction records are printed to the + # collector's stdout (verbosity: detailed), which the test reads back via + # GetLogsAsync. No bind mount / writable volume needed (the distroless image + # has no /tmp and Windows host bind mounts are unreliable under Testcontainers). + debug: + verbosity: detailed + +service: + pipelines: + logs: + receivers: [otlp] + processors: [transform/redaction, batch] + exporters: [debug] + telemetry: + logs: + level: info diff --git a/src/dotnet/Modgud.Api/ExtensionMethods/ObservabilityExtensions.cs b/src/dotnet/Modgud.Api/ExtensionMethods/ObservabilityExtensions.cs index 8c150dc7..d1acfb31 100644 --- a/src/dotnet/Modgud.Api/ExtensionMethods/ObservabilityExtensions.cs +++ b/src/dotnet/Modgud.Api/ExtensionMethods/ObservabilityExtensions.cs @@ -37,6 +37,19 @@ public static IServiceCollection AddModgudObservability( ObservabilitySettings settings, string? postgresConnectionString) { + // OTLP exporters speak HTTP/2 (gRPC always; HttpProtobuf negotiates it). + // Against a plaintext http:// collector that means HTTP/2 cleartext (h2c), + // which .NET disables by default — without this switch the metrics/traces + // exporter hangs on connection setup and every export times out after 10s + // (the log sink is unaffected: it uses its own HTTP/1.1 client). A TLS + // (https) endpoint negotiates HTTP/2 natively and needs no switch. This is + // the documented OTel-on-.NET requirement for insecure OTLP endpoints. + if (settings.Otlp.Enabled && + settings.Otlp.Endpoint.StartsWith("http://", StringComparison.OrdinalIgnoreCase)) + { + AppContext.SetSwitch("System.Net.Http.SocketsHttpHandler.Http2UnencryptedSupport", true); + } + var resourceBuilder = ResourceBuilder.CreateDefault() .AddService( serviceName: settings.ServiceName, @@ -63,7 +76,7 @@ public static IServiceCollection AddModgudObservability( if (settings.Otlp.Enabled) { - metrics.AddOtlpExporter(ConfigureOtlp(settings.Otlp)); + metrics.AddOtlpExporter(ConfigureOtlp(settings.Otlp, "v1/metrics")); } }) .WithTracing(tracing => @@ -98,7 +111,7 @@ public static IServiceCollection AddModgudObservability( if (settings.Otlp.Enabled) { - tracing.AddOtlpExporter(ConfigureOtlp(settings.Otlp)); + tracing.AddOtlpExporter(ConfigureOtlp(settings.Otlp, "v1/traces")); } }); @@ -131,14 +144,22 @@ public static IServiceCollection AddModgudObservability( return services; } - private static Action ConfigureOtlp(ObservabilitySettings.OtlpSettings otlp) + private static Action ConfigureOtlp( + ObservabilitySettings.OtlpSettings otlp, string signalPath) { + var isHttp = otlp.Protocol.Equals("HttpProtobuf", StringComparison.OrdinalIgnoreCase); return options => { - options.Endpoint = new Uri(otlp.Endpoint); - options.Protocol = otlp.Protocol.Equals("HttpProtobuf", StringComparison.OrdinalIgnoreCase) - ? OtlpExportProtocol.HttpProtobuf - : OtlpExportProtocol.Grpc; + options.Protocol = isHttp ? OtlpExportProtocol.HttpProtobuf : OtlpExportProtocol.Grpc; + + // Setting Endpoint explicitly disables the SDK's automatic per-signal + // path append (AppendSignalPathToEndpoint), so under HttpProtobuf we must + // include /v1/ ourselves or the exporter POSTs to the bare host + // and gets a 404. gRPC ignores the path (fixed service method), so the + // bare endpoint is correct there. + options.Endpoint = isHttp + ? new Uri($"{otlp.Endpoint.TrimEnd('/')}/{signalPath}") + : new Uri(otlp.Endpoint); }; } diff --git a/src/dotnet/Modgud.Api/Features/Admin/AdminObservabilityEndpoints.cs b/src/dotnet/Modgud.Api/Features/Admin/AdminObservabilityEndpoints.cs index a29421b6..8b547932 100644 --- a/src/dotnet/Modgud.Api/Features/Admin/AdminObservabilityEndpoints.cs +++ b/src/dotnet/Modgud.Api/Features/Admin/AdminObservabilityEndpoints.cs @@ -88,6 +88,33 @@ public static WebApplication MapAdminObservabilityEndpoints(this WebApplication }) .WithName("Admin_Observability_Activity"); + // GET /api/admin/observability/errors?limit=50 + // Phase 5 (§B.3) — recent operational errors for the caller's realm, + // newest-first, from the per-realm bounded ring. The initial snapshot + // for the live error panel; the SignalR LogsSubscribe stream pushes + // subsequent entries. Realm-scoped via TenantContext (physical scope — + // each realm reads only its own ring). + group.MapGet("errors", ( + RealmErrorBuffer errorBuffer, + int? limit) => + { + var realm = TenantContext.Current; + var take = Math.Clamp(limit ?? 50, 1, 200); + var entries = errorBuffer.GetRecent(realm, take); // already newest-first + var ordered = entries.Select(e => new + { + Timestamp = e.Timestamp, + Realm = e.Realm, + Level = e.Level, + Message = e.Message, + Exception = e.Exception, + SourceContext = e.SourceContext, + TraceId = e.TraceId, + }); + return Results.Ok(ordered); + }) + .WithName("Admin_Observability_Errors"); + return app; } } diff --git a/src/dotnet/Modgud.Api/Features/Admin/Jobs/AccountLifecycleSweepJob.cs b/src/dotnet/Modgud.Api/Features/Admin/Jobs/AccountLifecycleSweepJob.cs index e0941667..659e8e47 100644 --- a/src/dotnet/Modgud.Api/Features/Admin/Jobs/AccountLifecycleSweepJob.cs +++ b/src/dotnet/Modgud.Api/Features/Admin/Jobs/AccountLifecycleSweepJob.cs @@ -3,6 +3,7 @@ using Microsoft.Extensions.Logging; using Modgud.Authentication.Gdpr; using Modgud.Domain.Realms; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Persistence.Tenancy; using Quartz; @@ -30,6 +31,7 @@ namespace Modgud.Api.Features.Admin.Jobs; public class AccountLifecycleSweepJob( IServiceScopeFactory scopeFactory, IDocumentStore store, + ISecurityAuditLog securityAudit, ILogger logger) : IJob { public const string Key = "account-lifecycle-sweep"; @@ -70,16 +72,22 @@ public async Task Execute(IJobExecutionContext context) totalErased += erased; totalPurged += purged; if (reminded + erased + purged > 0) - logger.LogInformation( - "Auth: Account-lifecycle sweep — Realm={Realm} Reminded={Reminded} SelfErased={Erased} AutoPurged={Purged}", - realm.Slug, reminded, erased, purged); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.AccountLifecycleSwept, + Level = "Info", + Realm = realm.Slug, + Status = "swept", + Reason = $"reminded={reminded} selfErased={erased} autoPurged={purged}", + Message = $"Account-lifecycle sweep — Realm={realm.Slug} Reminded={reminded} SelfErased={erased} AutoPurged={purged}", + }); } realmsTouched++; } catch (Exception ex) { logger.LogError(ex, - "Auth: Account-lifecycle sweep failed for realm {Realm}", realm.Slug); + "Account-lifecycle sweep failed for realm {Realm}", realm.Slug); } } diff --git a/src/dotnet/Modgud.Api/Features/Admin/Jobs/DcrGcJob.cs b/src/dotnet/Modgud.Api/Features/Admin/Jobs/DcrGcJob.cs index f307bf22..d02579fa 100644 --- a/src/dotnet/Modgud.Api/Features/Admin/Jobs/DcrGcJob.cs +++ b/src/dotnet/Modgud.Api/Features/Admin/Jobs/DcrGcJob.cs @@ -7,6 +7,7 @@ using Modgud.Application.Scheduling; using Modgud.Domain.OAuth.Applications; using Modgud.Domain.Realms; +using Modgud.Infrastructure.Audit; using RealmSettingsDoc = Modgud.Domain.RealmSettings.RealmSettings; namespace Modgud.Api.Features.Admin.Jobs; @@ -29,7 +30,7 @@ namespace Modgud.Api.Features.Admin.Jobs; [DisallowConcurrentExecution] public class DcrGcJob( IServiceScopeFactory scopeFactory, - ILogger logger) : IJob + ISecurityAuditLog securityAudit) : IJob { public const string Key = "dcr-gc"; public const string Name = "DCR Garbage Collector"; @@ -108,11 +109,15 @@ private async Task SweepRealmAsync(IDocumentStore store, string tenantId, C session.Events.Append(state.Id, aggregate.Delete()); swept++; - var registeredAt = ParseTimestamp(state.Properties, OAuthApplicationPropertyKeys.DcrRegisteredAt); - logger.LogInformation( - "Auth: " + DcrAuditEvents.ClientGarbageCollected + - " ClientId={ClientId} RegisteredAt={RegisteredAt} LastUsedAt={LastUsedAt} TtlDays={TtlDays} Realm={Realm}", - state.ClientId, registeredAt, lastUsedAt, dcr.GcTtlDays, tenantId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.DcrClientGarbageCollected, + Realm = tenantId, + Level = "Info", + Status = "collected", + Reason = $"clientId {state.ClientId}, ttl {dcr.GcTtlDays}d", + Message = $"DCR client garbage-collected: {state.ClientId}", + }); } if (swept > 0) diff --git a/src/dotnet/Modgud.Api/Features/Admin/Jobs/JobRunHistoryRetentionJob.cs b/src/dotnet/Modgud.Api/Features/Admin/Jobs/JobRunHistoryRetentionJob.cs index 3127f740..af570dc9 100644 --- a/src/dotnet/Modgud.Api/Features/Admin/Jobs/JobRunHistoryRetentionJob.cs +++ b/src/dotnet/Modgud.Api/Features/Admin/Jobs/JobRunHistoryRetentionJob.cs @@ -80,7 +80,7 @@ public async Task Execute(IJobExecutionContext context) catch (Exception ex) when (ex is not OperationCanceledException) { Serilog.Log.Error(ex, - "Auth: job-run-history-retention failed for realm {Slug}", + "job-run-history-retention failed for realm {Slug}", realm.Slug); } } diff --git a/src/dotnet/Modgud.Api/Features/Admin/Jobs/SecurityAuditPruneJob.cs b/src/dotnet/Modgud.Api/Features/Admin/Jobs/SecurityAuditPruneJob.cs new file mode 100644 index 00000000..0f9e4b10 --- /dev/null +++ b/src/dotnet/Modgud.Api/Features/Admin/Jobs/SecurityAuditPruneJob.cs @@ -0,0 +1,65 @@ +using Marten; +using Microsoft.Extensions.DependencyInjection; +using Quartz; +using Modgud.Infrastructure.Audit; +using Modgud.Infrastructure.Persistence.Tenancy; + +namespace Modgud.Api.Features.Admin.Jobs; + +/// +/// Daily hard-prune of the streamless security/ops audit store +/// (). Replaces the legacy +/// AuthLogPersistenceService cleanup loop with a Quartz job admins can see, +/// re-cron, and trigger from /admin/jobs. +/// +/// The short, FIXED retention window is the GDPR proportionality control +/// for this store: it holds personal data about unidentified actors (attempted +/// identifiers, IPs under CJEU Breyer) processed under Art. 6(1)(f) legitimate +/// interest, with no per-subject erase path — so a genuine hard delete on a tight +/// window keeps the processing proportionate. Deliberately NOT per-realm configurable +/// (unlike the per-realm GDPR-audit visibility window, which is a view bound, +/// not a deletion). See dev-docs/future-features/logging-audit-redesign.md §A.6 +/// + the Legitimate-Interest Assessment. +/// +/// The store is a single cross-realm doc set in the system DB, so this is one +/// indexed delete — no per-realm iteration. +/// +[DisallowConcurrentExecution] +public class SecurityAuditPruneJob(IServiceScopeFactory scopeFactory) : IJob +{ + public const string Key = "security-audit-prune"; + public const string Name = "Security Audit Prune"; + public const string Description = + "Hard-deletes streamless security/ops audit entries older than the fixed " + + "short retention window (7 days). This retention is the GDPR proportionality " + + "control for the legitimate-interest data the store holds; deliberately fixed, " + + "not per-realm configurable."; + + /// Fixed short hard-retention for the legitimate-interest streamless store. + /// (The per-realm GDPR-audit visibility window is a separate, configurable concept.) + public static readonly TimeSpan Retention = TimeSpan.FromDays(7); + + /// 02:00 UTC daily. + public const string DefaultCron = "0 0 2 * * ?"; + + public async Task Execute(IJobExecutionContext context) + { + var ct = context.CancellationToken; + + using var scope = scopeFactory.CreateScope(); + var store = scope.ServiceProvider.GetRequiredService(); + + await using var session = store.LightweightSession(TenantConstants.SystemTenantId); + + var cutoff = DateTimeOffset.UtcNow - Retention; + var doomed = await session.Query() + .CountAsync(x => x.Timestamp < cutoff, ct); + + session.DeleteWhere(x => x.Timestamp < cutoff); + await session.SaveChangesAsync(ct); + + context.Result = doomed == 0 + ? "No entries to prune" + : $"Pruned {doomed} security-audit entr(ies) older than {Retention.TotalDays:0} day(s)"; + } +} diff --git a/src/dotnet/Modgud.Api/Features/Admin/Jobs/SigningKeyJanitorJob.cs b/src/dotnet/Modgud.Api/Features/Admin/Jobs/SigningKeyJanitorJob.cs index 9e0a7637..3322a227 100644 --- a/src/dotnet/Modgud.Api/Features/Admin/Jobs/SigningKeyJanitorJob.cs +++ b/src/dotnet/Modgud.Api/Features/Admin/Jobs/SigningKeyJanitorJob.cs @@ -3,6 +3,7 @@ using Microsoft.Extensions.Logging; using Quartz; using Modgud.Domain.Realms; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Realms; namespace Modgud.Api.Features.Admin.Jobs; @@ -27,6 +28,7 @@ namespace Modgud.Api.Features.Admin.Jobs; public class SigningKeyJanitorJob( IServiceScopeFactory scopeFactory, IRealmKeyStore keyStore, + ISecurityAuditLog securityAudit, ILogger logger) : IJob { public const string Key = "signing-key-janitor"; @@ -69,9 +71,16 @@ public async Task Execute(IJobExecutionContext context) { realmsTouched++; totalPurged += purged; - logger.LogInformation( - "Auth: signing-key janitor purged {Count} expired retired key(s) for realm {Realm}", - purged, realm.Slug); + // Realm-iterating job: bind the explicit iterated slug. + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.SigningKeyPurged, + Realm = realm.Slug, + Level = "Info", + Status = "purged", + Reason = $"purged {purged} expired retired key(s)", + Message = $"signing-key janitor purged {purged} expired retired key(s)", + }); } } catch (Exception ex) when (ex is not OperationCanceledException) diff --git a/src/dotnet/Modgud.Api/Features/Admin/ObservabilityHub.cs b/src/dotnet/Modgud.Api/Features/Admin/ObservabilityHub.cs index 0c3137ed..44d8b021 100644 --- a/src/dotnet/Modgud.Api/Features/Admin/ObservabilityHub.cs +++ b/src/dotnet/Modgud.Api/Features/Admin/ObservabilityHub.cs @@ -1,54 +1,152 @@ using System.Reactive.Linq; -using Modgud.Infrastructure.Observability; -using Modgud.Infrastructure.Persistence.Tenancy; +using System.Threading; +using System.Threading.Tasks; using Cocoar.SignalARRR.Common.Attributes; using Cocoar.SignalARRR.Server; +using Microsoft.AspNetCore.Http; +using Microsoft.AspNetCore.SignalR; +using Microsoft.Extensions.DependencyInjection; +using Modgud.Authentication.ExtensionMethods; +using Modgud.Authorization.Apps; +using Modgud.Authorization.Services; +using Modgud.Infrastructure.Observability; +using Modgud.Infrastructure.Persistence.Tenancy; namespace Modgud.Api.Features.Admin; /// -/// SignalARR hub that streams live observability events to subscribed -/// admin clients. Pairs with the REST /api/admin/observability/snapshot -/// + /activity endpoints — the REST surface delivers the initial -/// rolling-window state, this stream pushes every new event as it happens. +/// SignalARR hub that streams live observability data to subscribed admin +/// clients. Two streams: (metered activity events) and +/// (the Phase-5 per-realm live error feed). Both +/// pair with the REST /api/admin/observability/* endpoints — REST +/// delivers the initial rolling-window snapshot, the streams push every new +/// item as it happens. /// -/// Realm filtering: is captured -/// at subscription time. Each subscriber sees only their own realm's -/// events. Cross-realm aggregation is Phase 5.5 (see audit-followup -/// doc). +/// Realm filtering: the caller's realm is read from the connection +/// HttpContext.Items (set at connect by RealmMiddleware), NOT from +/// — the ambient tenant AsyncLocal is unset +/// during SignalARR hub dispatch (it unwinds when the negotiate request ends), +/// so it would fall back to system. This mirrors the sibling hubs +/// (UserHub / InboxHub / ServiceAccountHub). Each subscriber sees only their own +/// realm's items. (Control-plane cross-realm aggregation is deferred — the whole +/// observability surface is realm-scoped today, REST included.) /// -/// This hub deliberately does NOT replay history on subscribe — -/// the client already has it from the REST snapshot, replaying would -/// double-count. +/// Permission gating (Phase-5 hardening): SignalARR hubs share the +/// cookie-auth pipeline, and 's class-level [Authorize] +/// enforces authentication — but SignalARR has no per-method authorisation +/// attribute. So each stream method here imperatively checks +/// observability:read (the same permission the REST endpoints gate on) +/// against the caller's realm via ; an +/// unauthorised caller gets an immediately-completed empty stream. The check is +/// performed once at subscribe time (standard for long-lived push channels) and +/// closes the gap the previous revision flagged as a follow-up. /// -/// Permission gating: SignalARR hubs share the cookie-auth pipeline -/// of normal endpoints, but per-method authorisation is not yet on this -/// stack. The hub is callable by any authenticated admin; the realm -/// filter is the effective scope gate. A formal -/// observability:read check belongs on the SignalARR layer as a -/// followup (matches UserHub which has the same limitation today). +/// Neither stream replays history on subscribe — the client already has +/// it from the REST snapshot; replaying would double-count. /// [MessageName("Observability")] -public class ObservabilityHub(ObservabilityActivityBuffer buffer) +public class ObservabilityHub(ObservabilityActivityBuffer buffer, RealmErrorBuffer errorBuffer) : ServerMethods { + /// Live metered activity events (login, token, DCR, …) for the caller's realm. public IObservable Subscribe() { - // Capture realm at subscription time — TenantContext is set by - // RealmMiddleware before the SignalARR dispatch runs. - var realm = TenantContext.Current; + var http = Context.GetHttpContext(); + var realm = CallerRealm(http); - return Observable.Create(observer => + return AuthorizedRealmStream(http, realm, onNext => { void Handler(ObservabilityEvent ev) { - if (!string.Equals(ev.Realm, realm, StringComparison.Ordinal)) return; - try { observer.OnNext(ev); } - catch { /* observer disposed mid-flight — handled by IDisposable */ } + if (string.Equals(ev.Realm, realm, StringComparison.Ordinal)) onNext(ev); } buffer.EventRecorded += Handler; return () => buffer.EventRecorded -= Handler; }); } + + /// + /// Live operational error feed (Phase 5, §B.3) for the caller's realm. + /// Pushes every the + /// captures into this realm's bounded ring. + /// + public IObservable LogsSubscribe() + { + var http = Context.GetHttpContext(); + var realm = CallerRealm(http); + + return AuthorizedRealmStream(http, realm, onNext => + { + void Handler(ErrorLogEntry entry) + { + if (string.Equals(entry.Realm, realm, StringComparison.Ordinal)) onNext(entry); + } + + errorBuffer.EntryRecorded += Handler; + return () => errorBuffer.EntryRecorded -= Handler; + }); + } + + /// + /// The caller's realm, read from the connection context (set at connect by + /// RealmMiddleware). Null when no tenant was resolved — callers fail closed. + /// + private static string? CallerRealm(HttpContext? http) + => http?.Items[TenantConstants.HttpContextTenantIdKey] as string; + + /// + /// Wraps a buffer subscription in an observability:read permission + /// gate. attaches a handler and returns the + /// detach action; it is wired only once the caller is authorised, and torn + /// down when the client unsubscribes (the stream's cancellation token). + /// + private static IObservable AuthorizedRealmStream( + HttpContext? http, string? realm, Func, Action> subscribe) + { + return Observable.Create(async (observer, ct) => + { + if (!await IsAuthorizedAsync(http, realm)) + { + observer.OnCompleted(); + return; + } + + void OnNext(T item) + { + try { observer.OnNext(item); } + catch { /* observer disposed mid-flight — handled on cancel */ } + } + + var detach = subscribe(OnNext); + try + { + // Keep the subscription alive until the client unsubscribes / + // disconnects, which cancels ct. + await Task.Delay(Timeout.InfiniteTimeSpan, ct); + } + catch (OperationCanceledException) { /* expected on teardown */ } + finally + { + detach(); + } + }); + } + + private static async Task IsAuthorizedAsync(HttpContext? http, string? realm) + { + if (http is null || string.IsNullOrEmpty(realm)) return false; + var userId = http.GetUserId(); + if (userId is null) return false; + + // Hub dispatch runs outside the request's tenant scope, so bind the + // permission lookup to the caller's realm explicitly: the tenant-scoped + // IQuerySession behind IPermissionService resolves TenantContext.Current + // at construction. A fresh DI scope avoids reusing a connection-scoped + // session that may already be bound to another tenant. + using var _ = TenantContext.Enter(realm); + await using var scope = http.RequestServices.CreateAsyncScope(); + var permissions = scope.ServiceProvider.GetRequiredService(); + return await permissions.HasPermissionAsync(userId.Value, AppSlugs.Modgud, "observability:read"); + } } diff --git a/src/dotnet/Modgud.Api/Features/Auth/OAuth/DcrRegistrationEndpoints.cs b/src/dotnet/Modgud.Api/Features/Auth/OAuth/DcrRegistrationEndpoints.cs index e9486b08..236c71cf 100644 --- a/src/dotnet/Modgud.Api/Features/Auth/OAuth/DcrRegistrationEndpoints.cs +++ b/src/dotnet/Modgud.Api/Features/Auth/OAuth/DcrRegistrationEndpoints.cs @@ -2,6 +2,7 @@ using Modgud.Application.Services; using Modgud.Authentication.RealmSettings; using Modgud.Domain.Realms; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Observability; using Microsoft.AspNetCore.Mvc; @@ -50,6 +51,7 @@ private static async Task RegisterAsync( IDcrRegistrationValidator validator, DcrRateLimiter rateLimiter, Serilog.ILogger logger, + ISecurityAuditLog securityAudit, CancellationToken ct) { if (request is null) @@ -71,7 +73,7 @@ private static async Task RegisterAsync( var settings = (await realmSettingsService.LoadAsync(ct)).Dcr ?? new DcrSettings(); if (!settings.Enabled) { - LogRejected(logger, sourceIp, request.ClientName, DcrRejectionReason.RealmDisabled); + LogRejected(securityAudit, sourceIp, request.ClientName, DcrRejectionReason.RealmDisabled); ModgudMeters.RecordDcrRegistration(ModgudMeters.DcrOutcome.PolicyDenied); return Results.NotFound(); } @@ -84,7 +86,7 @@ private static async Task RegisterAsync( var reason = verdict == DcrRateLimitVerdict.PerIpExceeded ? DcrRejectionReason.PerIpRateLimit : DcrRejectionReason.PerRealmRateLimit; - LogRateLimit(logger, sourceIp, reason); + LogRateLimit(securityAudit, sourceIp, reason); ModgudMeters.RecordDcrRegistration(ModgudMeters.DcrOutcome.RateLimited); ModgudMeters.RecordDcrRateLimitHit( verdict == DcrRateLimitVerdict.PerIpExceeded @@ -100,7 +102,7 @@ private static async Task RegisterAsync( var validation = validator.Validate(request, settings, sourceIp); if (validation is DcrValidationResult.Reject reject) { - LogRejected(logger, sourceIp, request.ClientName, reject.Reason); + LogRejected(securityAudit, sourceIp, request.ClientName, reject.Reason); ModgudMeters.RecordDcrRegistration(ModgudMeters.DcrOutcome.InvalidRequest); return Results.BadRequest(new DcrErrorResponse { @@ -130,7 +132,7 @@ private static async Task RegisterAsync( // hint instead of a server-error opacity. logger .ForContext("IP", sourceIp) - .Warning("Auth: DCR persist failed — {Reason}", + .Warning("DCR persist failed — {Reason}", createResult.FirstError.Description); ModgudMeters.RecordDcrRegistration(ModgudMeters.DcrOutcome.InvalidRequest); return Results.BadRequest(new DcrErrorResponse @@ -141,12 +143,16 @@ private static async Task RegisterAsync( } var created = createResult.Value.Client; - logger - .ForContext("IP", sourceIp) - .Information( - "Auth: " + DcrAuditEvents.ClientRegistered + - " ClientId={ClientId} Name={ClientName} Realm={Realm}", - created.ClientId, created.DisplayName ?? "(none)", realmSlug); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.DcrClientRegistered, + Level = "Info", + Actor = created.DisplayName, + Ip = sourceIp, + Status = "registered", + Reason = $"clientId {created.ClientId}", + Message = $"DCR client registered: {created.DisplayName ?? "(none)"} ({created.ClientId})", + }); ModgudMeters.RecordDcrRegistration(ModgudMeters.DcrOutcome.Success); // ───────── Response ───────── @@ -194,23 +200,29 @@ private static string ResolveRealmSlug(HttpContext ctx) return "(unresolved)"; } - private static void LogRejected(Serilog.ILogger logger, string ip, string? clientName, DcrRejectionReason reason) + private static void LogRejected(ISecurityAuditLog securityAudit, string ip, string? clientName, DcrRejectionReason reason) { - // Audit-log envelope: prefix "Auth: DCR" so the SPA filter chip - // can scope the auth-log grid. Reason is enum-named for stable - // machine parseability ("DcrRegistrationRejected reason=…"). - logger - .ForContext("IP", ip) - .Warning( - "Auth: " + DcrAuditEvents.RegistrationRejected + - " Reason={Reason} ClientName={ClientName}", - reason, clientName ?? "(none)"); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.DcrRegistrationRejected, + Level = "Warning", + Ip = ip, + Status = "rejected", + Reason = $"{reason} clientName={clientName ?? "(none)"}", + Message = $"DCR registration rejected: {reason}", + }); } - private static void LogRateLimit(Serilog.ILogger logger, string ip, DcrRejectionReason reason) + private static void LogRateLimit(ISecurityAuditLog securityAudit, string ip, DcrRejectionReason reason) { - logger - .ForContext("IP", ip) - .Warning("Auth: " + DcrAuditEvents.RateLimitTriggered + " Reason={Reason}", reason); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RateLimitTriggered, + Level = "Warning", + Ip = ip, + Status = "rate_limited", + Reason = reason.ToString(), + Message = $"DCR rate limit triggered: {reason}", + }); } } diff --git a/src/dotnet/Modgud.Api/Features/Inbox/InboxRetentionJob.cs b/src/dotnet/Modgud.Api/Features/Inbox/InboxRetentionJob.cs index 71c525e4..b74f67ef 100644 --- a/src/dotnet/Modgud.Api/Features/Inbox/InboxRetentionJob.cs +++ b/src/dotnet/Modgud.Api/Features/Inbox/InboxRetentionJob.cs @@ -58,7 +58,7 @@ public async Task Execute(IJobExecutionContext context) catch (Exception ex) when (ex is not OperationCanceledException) { Serilog.Log.Error(ex, - "Auth: inbox-retention failed for realm {Slug}", + "inbox-retention failed for realm {Slug}", realm.Slug); } } diff --git a/src/dotnet/Modgud.Api/Modgud.Api.csproj b/src/dotnet/Modgud.Api/Modgud.Api.csproj index b8efe286..6df9498d 100644 --- a/src/dotnet/Modgud.Api/Modgud.Api.csproj +++ b/src/dotnet/Modgud.Api/Modgud.Api.csproj @@ -39,6 +39,8 @@ + + diff --git a/src/dotnet/Modgud.Api/ObservabilitySettings.cs b/src/dotnet/Modgud.Api/ObservabilitySettings.cs index 60ef7e53..c9b95b64 100644 --- a/src/dotnet/Modgud.Api/ObservabilitySettings.cs +++ b/src/dotnet/Modgud.Api/ObservabilitySettings.cs @@ -21,6 +21,7 @@ public class ObservabilitySettings public PrometheusSettings Prometheus { get; set; } = new(); public OtlpSettings Otlp { get; set; } = new(); + public ErrorFeedSettings ErrorFeed { get; set; } = new(); public class PrometheusSettings { @@ -59,12 +60,70 @@ public class OtlpSettings /// /// OTLP endpoint. Default points at a local collector on the gRPC port. + /// Uses 127.0.0.1 rather than localhost on purpose: against a + /// plaintext, IPv4-only local collector (e.g. a Docker port map) the SDK + /// exporter can resolve localhost to IPv6 ::1 and hang on + /// connect until the export times out. A real deployment sets its own + /// endpoint (and uses TLS). /// - public string Endpoint { get; set; } = "http://localhost:4317"; + public string Endpoint { get; set; } = "http://127.0.0.1:4317"; /// /// "Grpc" or "HttpProtobuf". Grpc is the canonical OTLP transport. /// public string Protocol { get; set; } = "Grpc"; } + + /// + /// In-app per-realm live error feed (logging/audit redesign Phase 5, §B.3). + /// Local-only — a bounded in-memory buffer + the existing SignalR hub, no + /// external dependency — so it runs independently of the OTLP export + /// (§B.0), behind this flag. + /// + public class ErrorFeedSettings + { + /// + /// Capture qualifying log events into the per-realm error buffer and + /// stream them to the admin observability view. Default on: it is local, + /// bounded, and needs no external infra. Turn off to drop the capture + /// sink entirely (the buffer stays empty and the panel shows nothing). + /// + public bool Enabled { get; set; } = true; + + /// + /// Minimum Serilog level captured. Default Error (Open Decision + /// #7) — the quiet "something broke" feed. Set to Warning to + /// widen it. Parsed case-insensitively; an unparseable value falls back + /// to Error. + /// + /// Effective floor = max(this, Serilog's pipeline floor). + /// The sink sits on the same logger that sets a global + /// MinimumLevel.Information(), so a value below Information + /// captures nothing more — Serilog drops sub-Information events before any + /// sink sees them. To go lower, raise the global minimum too. + /// + public string MinimumLevel { get; set; } = "Error"; + + /// + /// Only loggers whose SourceContext starts with this prefix feed + /// the buffer. Default Modgud (Open Decision #7) — application + /// logs only, framework loggers excluded. Set to "" to capture + /// every source (at the effective level floor). + /// + /// Note: framework loggers (Marten / Npgsql / Wolverine / + /// Microsoft / System) carry per-namespace MinimumLevel.Override(…, + /// Warning) floors, so even with an empty prefix their + /// sub-Warning events never reach this sink. An empty prefix + /// captures framework Warning+ only, unless those overrides are + /// also lowered. + /// + public string SourcePrefix { get; set; } = "Modgud"; + + /// + /// Per-realm ring capacity. Each realm keeps its own independently-capped + /// ring (a noisy realm cannot evict a quiet realm's entries). Total + /// footprint is bounded by realms × this. + /// + public int CapacityPerRealm { get; set; } = 100; + } } diff --git a/src/dotnet/Modgud.Api/Program.cs b/src/dotnet/Modgud.Api/Program.cs index 1ab15c5a..fc701197 100644 --- a/src/dotnet/Modgud.Api/Program.cs +++ b/src/dotnet/Modgud.Api/Program.cs @@ -11,6 +11,7 @@ using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.ResponseCompression; using Serilog; +using Serilog.Sinks.OpenTelemetry; using Serilog.Sinks.SystemConsole.Themes; using BuildingBlocks.EventDispatcher; using Fido2NetLib; @@ -23,7 +24,6 @@ using Modgud.Authentication.Api.Account.Services; using Modgud.Api.Features.Admin; using Modgud.Api.Features.Admin.OAuth; -using Modgud.Authentication.AuthLog; using Modgud.Authentication.Api.Admin; using Modgud.Authentication.Api.Admin.LoginProviders; using Modgud.Authentication.Api.ExternalAuth; @@ -853,10 +853,15 @@ ReferenceSyncRegistration.RegisterAll(opts, typeof(Program).Assembly); }); - // Auth log: Serilog sink → Channel → BackgroundService → Marten (7-day retention) - var authLogSink = new AuthLogSink(); - builder.Services.AddSingleton(authLogSink); - builder.Services.AddHostedService(); + // Streamless security/ops audit store (logging/audit redesign Track A, Phase 3). + // Typed best-effort sink (bounded channel) + background writer to the system DB. + // Replaced the legacy "Auth:"-message-prefix Serilog sink (AuthLogSink + + // AuthLogPersistenceService, now deleted). The realm is captured from + // TenantContext.Current at emit; the retention prune is a Quartz job (below). + builder.Services.AddSingleton(); + builder.Services.AddSingleton( + sp => sp.GetRequiredService()); + builder.Services.AddHostedService(); // Quartz-based scheduling framework + the system jobs we host. The DCR // garbage collector was a hand-rolled BackgroundService before Phase 1A; @@ -889,6 +894,11 @@ name: Modgud.Api.Features.Admin.Jobs.SigningKeyJanitorJob.Name, defaultCron: Modgud.Api.Features.Admin.Jobs.SigningKeyJanitorJob.DefaultCron, description: Modgud.Api.Features.Admin.Jobs.SigningKeyJanitorJob.Description); + builder.Services.AddSystemJob( + key: Modgud.Api.Features.Admin.Jobs.SecurityAuditPruneJob.Key, + name: Modgud.Api.Features.Admin.Jobs.SecurityAuditPruneJob.Name, + defaultCron: Modgud.Api.Features.Admin.Jobs.SecurityAuditPruneJob.DefaultCron, + description: Modgud.Api.Features.Admin.Jobs.SecurityAuditPruneJob.Description); // Inbox — per-recipient notifications with SignalR live push. Both // services are scoped (tenant-aware IDocumentSession). The InboxHub @@ -904,6 +914,16 @@ builder.Services.AddScoped(); + // Phase 5 — in-app per-realm live error feed (§B.3). One process-local + // buffer with an independently-capped ring PER realm (not a global ring — + // a noisy realm must not be able to evict a quiet realm's errors). The + // hub (ObservabilityHub.LogsSubscribe) and the /observability/errors + // endpoint read this same singleton; the ErrorFeedSink below feeds it. + var errorFeed = observabilitySettings.ErrorFeed; + var errorFeedBuffer = new Modgud.Infrastructure.Observability.RealmErrorBuffer( + errorFeed.CapacityPerRealm); + builder.Services.AddSingleton(errorFeedBuffer); + builder.Services.AddSerilog(logConfig => { // Global minimum: Information (so Auth: Info events are generated) @@ -930,14 +950,11 @@ logConfig.MinimumLevel.Override("System", Serilog.Events.LogEventLevel.Warning); logConfig.MinimumLevel.Override("Microsoft.Hosting.Lifetime", Serilog.Events.LogEventLevel.Information); - // Stamp every event with the ambient realm slug so the AuthLogSink can - // attribute each persisted "Auth:" entry to its realm (the sink runs - // tenant-less in a BackgroundService, so it must be captured at emit time). + // Stamp every event with the ambient realm slug (RealmLogEnricher). Kept + // after the "Auth:" sink was retired: it is how operational logs carry their + // realm tag for Console/File and for the OTLP log export (Phase 4) below. logConfig.Enrich.With(new Modgud.Authentication.AuthLog.RealmLogEnricher()); - // Auth log sink — captures ALL "Auth:" events (including Info) - logConfig.WriteTo.Sink(authLogSink); - // Console + File logConfig.WriteTo.Console(theme: AnsiConsoleTheme.Code); @@ -948,6 +965,56 @@ logConfig.WriteTo.File(path, rollingInterval: RollingInterval.Day, retainedFileCountLimit: 31); } + + // Phase 4 — OTLP log export. Off by default; shares the metrics/tracing + // OTLP gate + endpoint (Observability__Otlp__Enabled / OtlpSettings), so a + // deployment without a collector/OpenObserve is unaffected (§B.0). Wired as + // a Serilog sink rather than OTel .WithLogs(): AddSerilog runs with + // writeToProviders:false, so an OTel ILoggerProvider would never see the + // Serilog enrichers — in particular the RealmLogEnricher tag that §B.1 + // requires. The sink emits every Serilog property (incl. Realm) as a + // log-record attribute and reads Activity.Current for trace/span + // correlation automatically. The redaction GUARANTEE lives at the collector, + // not here; LogPiiMasking stays as belt. Endpoint is a bare base host:port + // for both protocols — the sink derives the per-signal path itself (and + // trims any /v1/logs an operator appends). + // See dev-docs/future-features/logging-audit-redesign.md §B.1-B.2. + if (observabilitySettings.Otlp.Enabled) + { + var otlp = observabilitySettings.Otlp; + logConfig.WriteTo.OpenTelemetry(o => + { + o.Endpoint = otlp.Endpoint; + o.Protocol = otlp.Protocol.Equals("HttpProtobuf", StringComparison.OrdinalIgnoreCase) + ? OtlpProtocol.HttpProtobuf + : OtlpProtocol.Grpc; + o.ResourceAttributes = new Dictionary + { + ["service.name"] = observabilitySettings.ServiceName, + ["service.version"] = System.Reflection.Assembly.GetExecutingAssembly() + .GetName().Version?.ToString() ?? "unknown", + ["service.instance.id"] = Environment.MachineName, + }; + }); + } + + // Phase 5 — in-app per-realm error feed sink (§B.3). Local-only, behind + // its own flag (default on; no external dependency). Captures Error+ + // events from Modgud.* loggers (configurable level/prefix — Open + // Decision #7) into the per-realm RealmErrorBuffer. Sits AFTER the + // RealmLogEnricher above, so each entry carries its realm tag. The + // collector redaction does NOT cover this in-app path — the call-site + // PII belt + per-realm read scoping are the controls (mirrors the + // streamless security store). + if (errorFeed.Enabled) + { + var minimumLevel = + Enum.TryParse(errorFeed.MinimumLevel, ignoreCase: true, out var lvl) + ? lvl + : Serilog.Events.LogEventLevel.Error; + logConfig.WriteTo.Sink(new Modgud.Authentication.AuthLog.ErrorFeedSink( + errorFeedBuffer, minimumLevel, errorFeed.SourcePrefix)); + } }); var app = builder.Build(); @@ -1048,6 +1115,7 @@ app.MapStatusEndpoints(); app.MapAuthLogEndpoints("api"); + app.MapAuditEndpoints("api"); app.MapAppSettingsEndpoints("api"); app.MapProjectionEndpoints("api"); app.MapRealmsEndpoints("api"); @@ -1300,7 +1368,7 @@ await Modgud.Infrastructure.Authorization.AppRealmSeeder.SeedAsync( .Where(g => !g.IsDeleted).Take(1).ToListAsync(); await session.Query() .Where(p => !p.IsDeleted).Take(1).ToListAsync(); - await session.Query() + await session.Query() .OrderByDescending(l => l.Timestamp).Take(1).ToListAsync(); await session.Query() .Take(1).ToListAsync(); @@ -1360,6 +1428,13 @@ await Modgud.Infrastructure.Authorization.AppRealmSeeder.SeedAsync( var exitCode = await Modgud.Authentication.Api.Admin.RecoveryCli.RunAsync( app.Services, cliArgs[1..], conf, app.Environment); + // This path never starts the host, so the SecurityAuditWriter background + // drain never runs — flush the recovery CLI's enqueued security-audit + // records to the system DB synchronously before the process exits, or the + // break-glass forensic trail would be lost. + await app.Services.GetRequiredService() + .FlushAsync(app.Services.GetRequiredService()); + if (fromEnv) { Log.Information( @@ -1495,7 +1570,7 @@ static void EnsureCertificateExists( GenerateSelfSignedPfx(path, subject, keyUsage, validYears: 2, keySize: 2048); Log.Warning( - "Auth: auto-generated self-signed {Purpose} certificate at {Path}. " + + "auto-generated self-signed {Purpose} certificate at {Path}. " + "This is fine for self-hosted Beta; replace with a managed cert " + "(Key Vault / Secrets Manager / cocoar-secrets generate-cert) before " + "going to public production.", diff --git a/src/dotnet/Modgud.Api/data/configuration.json b/src/dotnet/Modgud.Api/data/configuration.json index b26f221d..caef8f4b 100644 --- a/src/dotnet/Modgud.Api/data/configuration.json +++ b/src/dotnet/Modgud.Api/data/configuration.json @@ -57,8 +57,14 @@ }, "Otlp": { "Enabled": false, - "Endpoint": "http://localhost:4317", + "Endpoint": "http://127.0.0.1:4317", "Protocol": "Grpc" + }, + "ErrorFeed": { + "Enabled": true, + "MinimumLevel": "Error", + "SourcePrefix": "Modgud", + "CapacityPerRealm": 100 } } } diff --git a/src/dotnet/Modgud.Application/DTOs/RealmSettings/AuditSettingsDtos.cs b/src/dotnet/Modgud.Application/DTOs/RealmSettings/AuditSettingsDtos.cs new file mode 100644 index 00000000..dfca6633 --- /dev/null +++ b/src/dotnet/Modgud.Application/DTOs/RealmSettings/AuditSettingsDtos.cs @@ -0,0 +1,18 @@ +namespace Modgud.Application.DTOs.RealmSettings; + +/// Read shape for the tenant-audit sub-section of +/// /api/admin/realm-settings. Defaults are surfaced for realms where the +/// window has never been configured, so the SPA renders the edit form without +/// special-casing a null section. +public record AuditSettingsDto +{ + public int VisibilityWindowDays { get; init; } = 90; +} + +/// Patch payload for the tenant-audit sub-section. Nullable = no change on +/// the wire; non-null = replace. Same partial-PATCH shape as the other +/// sub-sections. +public record UpdateAuditSettingsDto +{ + public int? VisibilityWindowDays { get; init; } +} diff --git a/src/dotnet/Modgud.Application/DTOs/RealmSettings/RealmSettingsDtos.cs b/src/dotnet/Modgud.Application/DTOs/RealmSettings/RealmSettingsDtos.cs index ee778633..22ce3400 100644 --- a/src/dotnet/Modgud.Application/DTOs/RealmSettings/RealmSettingsDtos.cs +++ b/src/dotnet/Modgud.Application/DTOs/RealmSettings/RealmSettingsDtos.cs @@ -16,6 +16,7 @@ public record RealmSettingsDto public DcrSettingsDto Dcr { get; init; } = new(); public BrandingSettingsDto Branding { get; init; } = new(); public DeletionSettingsDto Deletion { get; init; } = new(); + public AuditSettingsDto Audit { get; init; } = new(); /// Page-builder schemas keyed by slug. Read-only via the bulk /// GET; writes go through the dedicated /api/admin/customization/pages/{slug} @@ -34,4 +35,5 @@ public record UpdateRealmSettingsDto public UpdateDcrSettingsDto? Dcr { get; init; } public UpdateBrandingSettingsDto? Branding { get; init; } public UpdateDeletionSettingsDto? Deletion { get; init; } + public UpdateAuditSettingsDto? Audit { get; init; } } diff --git a/src/dotnet/Modgud.Application/Dcr/DcrAuditEvents.cs b/src/dotnet/Modgud.Application/Dcr/DcrAuditEvents.cs deleted file mode 100644 index 1331df79..00000000 --- a/src/dotnet/Modgud.Application/Dcr/DcrAuditEvents.cs +++ /dev/null @@ -1,46 +0,0 @@ -namespace Modgud.Application.Dcr; - -/// -/// Canonical event-name strings used in Auth: DCR … log lines. -/// Centralised so both the emitting site (registration endpoint, GC -/// service, token-issue handler) and the consuming site (SPA auth-log -/// grid filter chip) reference the same vocabulary. -/// -/// The auth-log capture path is message-prefix-based (see -/// AuthLogSink); the SPA filters by matching the prefix -/// "DCR " + the event name. Renaming an event name without -/// updating both sides breaks the filter UI — that's why these live -/// here, not as inline literals. -/// -/// Reasons (for Rejected / RateLimitTriggered) ride in a -/// {Reason} Serilog property using the -/// enum names — see -/// DcrRegistrationEndpoints.cs for the emission pattern. -/// -public static class DcrAuditEvents -{ - /// Successful registration. Fields: IP, Realm, - /// ClientId, ClientName. - public const string ClientRegistered = "DCR client registered"; - - /// Validation rejected the registration request. Fields: - /// IP, Reason ({Reason}={RejectionReason}), ClientName. - public const string RegistrationRejected = "DCR registration rejected"; - - /// Per-IP or per-realm rate-limit hit. Fields: IP, - /// Reason ({Reason}=PerIpRateLimit | PerRealmRateLimit). - public const string RateLimitTriggered = "DCR rate-limit triggered"; - - /// First successful /connect/authorize invocation - /// for a freshly-registered DCR client. The cleanest signal for - /// "registration was real, not bot-noise". Emitted by the - /// LastUsedAt-update path (lands with the GC infra in a follow-up - /// commit). Fields: ClientId, RegisteredAt. - public const string ClientFirstUsed = "DCR client first used"; - - /// GC sweep soft-deleted a DCR client whose - /// LastUsedAt aged past the per-realm TTL. Fields: ClientId, - /// RegisteredAt, LastUsedAt, TtlDays. Emitted by the GC - /// IHostedService (follow-up commit). - public const string ClientGarbageCollected = "DCR client garbage collected"; -} diff --git a/src/dotnet/Modgud.Authentication/Api/Account/AccountEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Account/AccountEndpoints.cs index 7a6da956..9f351947 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/AccountEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/AccountEndpoints.cs @@ -13,6 +13,7 @@ using Modgud.Authentication.Sessions; using Modgud.Authorization.Apps; using Modgud.Authorization.Services; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Observability; namespace Modgud.Authentication.Api.Account; @@ -74,6 +75,7 @@ public static WebApplication MapAccountEndpoints(this WebApplication application IDocumentSession docSession, IQuerySession session, ISessionService sessionService, + ISecurityAuditLog securityAudit, HttpContext context) => { var ip = context.Connection.RemoteIpAddress?.ToString() ?? "unknown"; @@ -100,7 +102,16 @@ public static WebApplication MapAccountEndpoints(this WebApplication application // Never reveal whether a username exists, is deactivated, or is locked. if (user is null || !user.IsActive) { - Log.Warning("Auth: Login failed — user not found or inactive. UserName={UserName} IP={IP}", request.UserName, ip); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.LoginFailedUnknownUser, + Level = "Warning", + Actor = LogPiiMasking.MaskUsername(request.UserName), + Ip = ip, + Status = "rejected", + Reason = "user not found or inactive", + Message = $"Login failed for {LogPiiMasking.MaskUsername(request.UserName)} — user not found or inactive", + }); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.Password, ModgudMeters.LoginOutcome.Failure); return Results.Json(new { Message = "Invalid credentials" }, statusCode: 401); } @@ -110,12 +121,29 @@ public static WebApplication MapAccountEndpoints(this WebApplication application if (result.Succeeded) { - Log.Information("Auth: Login successful. User={UserName} IP={IP}", user.UserName, ip); + Log.Information("Login successful. UserId={UserId} IP={IP}", user.Id, ip); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.Password, ModgudMeters.LoginOutcome.Success); // Track per-user device session (best-effort). await SessionTracker.RecordLoginAsync(sessionService, context, user.Id); + // Audit marker on the user's stream (Phase 1): the "when + by what + // method" of a successful login. No IP on the event — IP/device live + // in the Sessions feature (RecordLoginAsync above). Erasable with the + // user. Best-effort: PasswordSignInAsync has already issued the auth + // cookie, so a failed marker write must NOT turn a successful login + // into a 500 — log and continue (mirrors SessionTracker's contract). + try + { + docSession.Events.Append(user.Id, new Modgud.Authentication.Events.UserLoggedInEvent( + user.Id, IpAddress: null, Method: ModgudMeters.LoginMethod.Password)); + await docSession.SaveChangesAsync(context.RequestAborted); + } + catch (Exception ex) + { + Log.Warning(ex, "failed to persist login audit marker for user {UserId}", user.Id); + } + // Level >= 1: check if user needs to set up a secure login method if (appSettings.AuthenticationMinimumLevel >= 1) { @@ -128,14 +156,14 @@ public static WebApplication MapAccountEndpoints(this WebApplication application // EventSourcedUserStore creates UserSecurityData on first password change. // Fall back to blocking setup if the document is missing — caller can // set up 2FA which will create the document. - Log.Information("Auth: User requires secure setup (no security data). User={UserName} IP={IP}", user.UserName, ip); + Log.Information("User requires secure setup (no security data). UserId={UserId} IP={IP}", user.Id, ip); return Results.Ok(new { RequiresSecureSetup = true, GracePeriod = false }); } // Hard opt-out: treat as if 2FA is set up. Audit-log every occurrence. if (securityData.TwoFactorExempt) { - Log.Warning("Auth: 2FA-exempt login. User={UserName} IP={IP}", user.UserName, ip); + Log.Warning("2FA-exempt login. UserId={UserId} IP={IP}", user.Id, ip); return Results.Ok(new { Message = "Login successful" }); } @@ -149,13 +177,13 @@ public static WebApplication MapAccountEndpoints(this WebApplication application securityData.SecureSetupDueAt = DateTime.UtcNow.AddDays(graceDays); docSession.Store(securityData); await docSession.SaveChangesAsync(); - Log.Information("Auth: Grace period started. User={UserName} DueAt={DueAt} IP={IP}", - user.UserName, securityData.SecureSetupDueAt, ip); + Log.Information("Grace period started. UserId={UserId} DueAt={DueAt} IP={IP}", + user.Id, securityData.SecureSetupDueAt, ip); } var inGrace = securityData.SecureSetupDueAt is { } due && due > DateTime.UtcNow; - Log.Information("Auth: User requires secure setup. User={UserName} InGrace={InGrace} DueAt={DueAt} IP={IP}", - user.UserName, inGrace, securityData.SecureSetupDueAt, ip); + Log.Information("User requires secure setup. UserId={UserId} InGrace={InGrace} DueAt={DueAt} IP={IP}", + user.Id, inGrace, securityData.SecureSetupDueAt, ip); return Results.Ok(new { RequiresSecureSetup = true, @@ -170,7 +198,7 @@ public static WebApplication MapAccountEndpoints(this WebApplication application if (result.RequiresTwoFactor) { - Log.Information("Auth: Login requires MFA. User={UserName} IP={IP}", user.UserName, ip); + Log.Information("Login requires MFA. UserId={UserId} IP={IP}", user.Id, ip); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.Password, ModgudMeters.LoginOutcome.TwoFactorRequired); var mfaMethods = new List(); if (user.TwoFactorEnabled) mfaMethods.Add("totp"); @@ -180,12 +208,12 @@ public static WebApplication MapAccountEndpoints(this WebApplication application if (result.IsLockedOut) { - Log.Warning("Auth: Login failed — account locked. User={UserName} IP={IP}", user.UserName, ip); + Log.Warning("Login failed — account locked. UserId={UserId} IP={IP}", user.Id, ip); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.Password, ModgudMeters.LoginOutcome.Locked); } else { - Log.Warning("Auth: Login failed — wrong password. User={UserName} IP={IP}", user.UserName, ip); + Log.Warning("Login failed — wrong password. UserId={UserId} IP={IP}", user.Id, ip); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.Password, ModgudMeters.LoginOutcome.Failure); } @@ -296,12 +324,12 @@ public static WebApplication MapAccountEndpoints(this WebApplication application var result = await userManager.ChangePasswordAsync(user, request.CurrentPassword, request.NewPassword); if (!result.Succeeded) { - Log.Warning("Auth: Change password failed. User={UserName} IP={IP}", user.UserName, ip); + Log.Warning("Change password failed. UserId={UserId} IP={IP}", user.Id, ip); var errors = result.Errors.Select(e => e.Description).ToList(); return Results.Json(new { Message = string.Join(" ", errors) }, statusCode: 400); } - Log.Information("Auth: Password changed. User={UserName} IP={IP}", user.UserName, ip); + Log.Information("Password changed. UserId={UserId} IP={IP}", user.Id, ip); return Results.Ok(new { Message = "Password changed successfully" }); }) .WithName("Account_ChangePassword"); diff --git a/src/dotnet/Modgud.Authentication/Api/Account/BootstrapEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Account/BootstrapEndpoints.cs index 4316f419..8f8f1edd 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/BootstrapEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/BootstrapEndpoints.cs @@ -2,6 +2,7 @@ using Modgud.Authentication.Identity; using Modgud.Authentication.Sessions; using Modgud.Authentication.Setup; +using Modgud.Infrastructure.Audit; using Microsoft.AspNetCore.Identity; namespace Modgud.Authentication.Api.Account; @@ -42,16 +43,23 @@ public static WebApplication MapBootstrapEndpoints(this WebApplication app, stri IPendingAdminInviteService inviteService, UserManager userManager, SignInManager signInManager, - ISessionService sessionService) => + ISessionService sessionService, + ISecurityAuditLog securityAudit) => { var ip = http.Connection.RemoteIpAddress?.ToString() ?? "unknown"; var result = await inviteService.ConsumeAsync(request.Token, request.Password); if (result.IsError) { - Serilog.Log.Information( - "Auth: Bootstrap-invite consume rejected. IP={IP} Code={Code}", - ip, result.FirstError.Code); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.BootstrapInviteRejected, + Level = "Warning", + Ip = ip, + Status = "rejected", + Reason = "invalid or expired invite", + Message = "Bootstrap invite consume rejected", + }); return Results.Problem( statusCode: StatusCodes.Status400BadRequest, title: result.FirstError.Code, @@ -70,8 +78,8 @@ public static WebApplication MapBootstrapEndpoints(this WebApplication app, stri } Serilog.Log.Warning( - "Auth: Bootstrap admin created via invite. IP={IP} UserName={UserName}", - ip, result.Value.UserName); + "Bootstrap admin created via invite. IP={IP} UserId={UserId}", + ip, result.Value.UserId); return Results.Ok(new { Message = "Bootstrap successful", UserName = result.Value.UserName }); }) diff --git a/src/dotnet/Modgud.Authentication/Api/Account/EmailVerificationEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Account/EmailVerificationEndpoints.cs index cd170b18..ebd8290e 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/EmailVerificationEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/EmailVerificationEndpoints.cs @@ -165,7 +165,7 @@ await emailService.SendTemplatedEmailAsync( session.Delete(challenge); await session.SaveChangesAsync(); - Serilog.Log.Information("EmailVerification: confirmed user {UserName}", user.UserName); + Serilog.Log.Information("EmailVerification: confirmed user {UserId}", user.Id); return Results.Ok(new { Message = "Email verified" }); }) .WithName("EmailVerification_Consume"); diff --git a/src/dotnet/Modgud.Authentication/Api/Account/MagicLinkEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Account/MagicLinkEndpoints.cs index 5e2b2dc3..ca3f704b 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/MagicLinkEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/MagicLinkEndpoints.cs @@ -6,6 +6,7 @@ using Modgud.Authentication.Domain; using Modgud.Authentication; using Modgud.Authentication.Sessions; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Email; using Modgud.Infrastructure.Observability; @@ -131,6 +132,7 @@ await emailService.SendTemplatedEmailAsync( IDocumentSession session, SignInManager signInManager, ISessionService sessionService, + ISecurityAuditLog securityAudit, HttpContext context) => { if (string.IsNullOrWhiteSpace(request.Token)) @@ -146,7 +148,15 @@ await emailService.SendTemplatedEmailAsync( if (challenge is null || challenge.IsExpired) { - Serilog.Log.Warning("Auth: Magic link login failed — invalid/expired token. IP={IP}", ip); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.MagicLinkInvalid, + Level = "Warning", + Ip = ip, + Status = "rejected", + Reason = "invalid or expired token", + Message = "Magic-link login failed — invalid or expired token", + }); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.MagicLink, ModgudMeters.LoginOutcome.Failure); if (challenge is not null) { session.Delete(challenge); await session.SaveChangesAsync(); } return Results.Json(new { Message = "Invalid or expired link" }, statusCode: 401); @@ -156,7 +166,15 @@ await emailService.SendTemplatedEmailAsync( var user = await session.LoadAsync(request.UserId); if (user is null || user.IsDeleted || !user.IsActive) { - Serilog.Log.Warning("Auth: Magic link login failed — user not found/inactive. IP={IP}", ip); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.LoginFailedUnknownUser, + Level = "Warning", + Ip = ip, + Status = "rejected", + Reason = "user not found or inactive", + Message = "Magic-link login failed — user not found or inactive", + }); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.MagicLink, ModgudMeters.LoginOutcome.Failure); session.Delete(challenge); await session.SaveChangesAsync(); @@ -186,6 +204,11 @@ await emailService.SendTemplatedEmailAsync( Email: default)); } + // Audit marker — magic-link login success (Phase 1). No IP on the event + // (the Sessions feature owns IP/device); rides the same transaction. + session.Events.Append(user.Id, new Modgud.Authentication.Events.UserLoggedInEvent( + user.Id, IpAddress: null, Method: ModgudMeters.LoginMethod.MagicLink)); + // Delete challenge (one-time use) session.Delete(challenge); await session.SaveChangesAsync(); @@ -196,7 +219,7 @@ await emailService.SendTemplatedEmailAsync( await SessionTracker.RecordLoginAsync(sessionService, context, user.Id); - Serilog.Log.Information("Auth: Magic link login successful. User={UserName} IP={IP}", user.UserName, ip); + Serilog.Log.Information("Magic link login successful. UserId={UserId} IP={IP}", user.Id, ip); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.MagicLink, ModgudMeters.LoginOutcome.Success); return Results.Ok(new { Message = "Login successful" }); }) diff --git a/src/dotnet/Modgud.Authentication/Api/Account/MfaEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Account/MfaEndpoints.cs index 626db0e7..e7f8d946 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/MfaEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/MfaEndpoints.cs @@ -155,12 +155,12 @@ public static WebApplication MapMfaEndpoints(this WebApplication application, st if (twoFactorUser is not null) await SessionTracker.RecordLoginAsync(sessionService, context, twoFactorUser.Id); - Serilog.Log.Information("Auth: MFA login successful. IP={IP}", ip); + Serilog.Log.Information("MFA login successful. IP={IP}", ip); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.Mfa, ModgudMeters.LoginOutcome.Success); return Results.Ok(new { Message = "Login successful" }); } - Serilog.Log.Warning("Auth: MFA login failed — invalid code. IP={IP} Locked={Locked}", ip, result.IsLockedOut); + Serilog.Log.Warning("MFA login failed — invalid code. IP={IP} Locked={Locked}", ip, result.IsLockedOut); if (result.IsLockedOut) { diff --git a/src/dotnet/Modgud.Authentication/Api/Account/PasskeyEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Account/PasskeyEndpoints.cs index 1637484c..7ea53dde 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/PasskeyEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/PasskeyEndpoints.cs @@ -321,7 +321,7 @@ public static WebApplication MapPasskeyEndpoints(this WebApplication application await SessionTracker.RecordLoginAsync(sessionService, context, user.Id); var ip = context.Connection.RemoteIpAddress?.ToString() ?? "unknown"; - Serilog.Log.Information("Auth: Passkey login successful. User={UserName} IP={IP}", user.UserName, ip); + Serilog.Log.Information("Passkey login successful. UserId={UserId} IP={IP}", user.Id, ip); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.Passkey, ModgudMeters.LoginOutcome.Success); return Results.Ok(new { Message = "Login successful" }); diff --git a/src/dotnet/Modgud.Authentication/Api/Account/ProfileEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Account/ProfileEndpoints.cs index b290f2ee..f0dfe27d 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/ProfileEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/ProfileEndpoints.cs @@ -209,8 +209,8 @@ await emailService.SendTemplatedEmailAsync(pending.Email.Value!, EmailTemplate.E } } - Log.Information("Profile: Change request upserted. User={UserName} Status={Status}", - user.UserName, request.Status); + Log.Information("Profile: Change request upserted. UserId={UserId} Status={Status}", + user.Id, request.Status); return Results.Ok(new { Open = MapForApi(request, user) }); }); @@ -311,7 +311,7 @@ await emailService.SendTemplatedEmailAsync(recipients, if (adminItemIds.Count > 0) await inboxNotifier.DismissByIdsAsync(adminItemIds); - Log.Information("Profile: Change request cancelled. User={UserName}", user.UserName); + Log.Information("Profile: Change request cancelled. UserId={UserId}", user.Id); return Results.NoContent(); }); diff --git a/src/dotnet/Modgud.Authentication/Api/Account/Services/AppSignInManager.cs b/src/dotnet/Modgud.Authentication/Api/Account/Services/AppSignInManager.cs index d7097ee5..8bba0b14 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/Services/AppSignInManager.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/Services/AppSignInManager.cs @@ -46,8 +46,8 @@ public override async Task SignInWithClaimsAsync( if (securityData?.TwoFactorExempt == true) { Serilog.Log.Warning( - "Auth: 2FA-exempt user signed in. User={UserName}", - user.UserName); + "2FA-exempt user signed in. UserId={UserId}", + user.Id); } await base.SignInWithClaimsAsync(user, authenticationProperties, additionalClaims); diff --git a/src/dotnet/Modgud.Authentication/Api/Account/TwoFactorEnforcementMiddleware.cs b/src/dotnet/Modgud.Authentication/Api/Account/TwoFactorEnforcementMiddleware.cs index 7172677b..8bb574a5 100644 --- a/src/dotnet/Modgud.Authentication/Api/Account/TwoFactorEnforcementMiddleware.cs +++ b/src/dotnet/Modgud.Authentication/Api/Account/TwoFactorEnforcementMiddleware.cs @@ -135,8 +135,8 @@ public async Task InvokeAsync( session.Store(securityData); await session.SaveChangesAsync(); Serilog.Log.Information( - "Auth: Grace period lazy-stamped from middleware. User={UserName} DueAt={DueAt}", - user.UserName, securityData.SecureSetupDueAt); + "Grace period lazy-stamped from middleware. UserId={UserId} DueAt={DueAt}", + user.Id, securityData.SecureSetupDueAt); await next(context); return; } @@ -150,8 +150,8 @@ public async Task InvokeAsync( // No grace left — block. Serilog.Log.Warning( - "Auth: 2FA enforcement blocked request. User={UserName} Path={Path}", - user.UserName, path); + "2FA enforcement blocked request. UserId={UserId} Path={Path}", + user.Id, path); ModgudMeters.RecordTwoFactorBlocked(); context.Response.StatusCode = StatusCodes.Status403Forbidden; await context.Response.WriteAsJsonAsync(new diff --git a/src/dotnet/Modgud.Authentication/Api/Admin/AdminGraceEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Admin/AdminGraceEndpoints.cs index a4a5197e..a24d0877 100644 --- a/src/dotnet/Modgud.Authentication/Api/Admin/AdminGraceEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Admin/AdminGraceEndpoints.cs @@ -85,8 +85,8 @@ public static WebApplication MapAdminGraceEndpoints(this WebApplication applicat await session.SaveChangesAsync(); Serilog.Log.Information( - "Admin: Grace policy updated. User={UserName} Override={Override} Exempt={Exempt}", - user.UserName, securityData.GracePeriodDaysOverride, securityData.TwoFactorExempt); + "Admin: Grace policy updated. UserId={UserId} Override={Override} Exempt={Exempt}", + user.Id, securityData.GracePeriodDaysOverride, securityData.TwoFactorExempt); return Results.Ok(new { securityData.GracePeriodDaysOverride, @@ -120,8 +120,8 @@ public static WebApplication MapAdminGraceEndpoints(this WebApplication applicat session.Store(securityData); await session.SaveChangesAsync(); - Serilog.Log.Information("Admin: Grace period reset. User={UserName} DueAt={DueAt}", - user.UserName, securityData.SecureSetupDueAt); + Serilog.Log.Information("Admin: Grace period reset. UserId={UserId} DueAt={DueAt}", + user.Id, securityData.SecureSetupDueAt); return Results.Ok(new { SecureSetupDueAt = securityData.SecureSetupDueAt }); }) .WithName("Admin_ResetGracePeriod"); @@ -145,7 +145,7 @@ public static WebApplication MapAdminGraceEndpoints(this WebApplication applicat session.Store(securityData); await session.SaveChangesAsync(); - Serilog.Log.Information("Admin: Grace period expired immediately. User={UserName}", user.UserName); + Serilog.Log.Information("Admin: Grace period expired immediately. UserId={UserId}", user.Id); return Results.NoContent(); }) .WithName("Admin_ClearGracePeriod"); diff --git a/src/dotnet/Modgud.Authentication/Api/Admin/AdminMagicLinkEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Admin/AdminMagicLinkEndpoints.cs index 3455fc7f..6bceba1d 100644 --- a/src/dotnet/Modgud.Authentication/Api/Admin/AdminMagicLinkEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Admin/AdminMagicLinkEndpoints.cs @@ -5,6 +5,7 @@ using Modgud.Authorization.AspNetCore; using Modgud.Authentication; using Modgud.Authentication.Domain; +using Modgud.Authentication.Identity; using Modgud.Infrastructure.Email; namespace Modgud.Authentication.Api.Admin; @@ -78,7 +79,7 @@ await emailService.SendTemplatedEmailAsync( ["ExpirationMinutes"] = config.ExpirationMinutes.ToString(), }); - Serilog.Log.Information("Admin: Magic link sent to {UserName} ({Email})", user.UserName, user.Email); + Serilog.Log.Information("Admin: Magic link sent to {UserId} ({MaskedEmail})", user.Id, LogPiiMasking.MaskEmail(user.Email)); return Results.Ok(new { Message = "Magic link sent" }); }) .WithName("Admin_SendMagicLink"); diff --git a/src/dotnet/Modgud.Authentication/Api/Admin/AuditEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Admin/AuditEndpoints.cs new file mode 100644 index 00000000..588711f5 --- /dev/null +++ b/src/dotnet/Modgud.Authentication/Api/Admin/AuditEndpoints.cs @@ -0,0 +1,94 @@ +using Marten; +using Modgud.Authentication.Audit; +using Modgud.Authentication.Domain; +using Modgud.Authentication.RealmSettings; +using Modgud.Authorization.AspNetCore; +using Modgud.Domain.Realms; + +namespace Modgud.Authentication.Api.Admin; + +/// +/// Tenant audit read surface (logging/audit redesign Track A — the GDPR-audit half). +/// +/// Unlike the legacy AuthLog (cross-realm in the system DB, scoped at +/// read via ScopeToCallerRealm), lives +/// per-realm in the tenant DB. So the tenant-scoped +/// returns only the caller's realm by physical isolation — no +/// WHERE Realm = filter is needed and a filter bug cannot leak cross-realm. +/// Control-plane cross-realm fan-out across realm DBs is deferred; the platform-wide +/// surface is the streamless security store (Phase 3). +/// +public static class AuditEndpoints +{ + public static WebApplication MapAuditEndpoints(this WebApplication application, string path) + { + var group = application.MapGroup($"{path}/admin/audit") + .WithTags("Admin Audit") + .RequireAuthorization(); + + group.MapGet("", async ( + IDocumentSession session, + IRealmSettingsService realmSettings, + string? category, + string? eventType, + int? limit, + CancellationToken ct) => + { + // Visibility window (§A.6): show only the last N days. A *view* bound, + // not a deletion — older rows stay on the (masked-on-erase) streams. + var window = (await realmSettings.LoadAsync(ct)).Audit ?? AuditSettings.Defaults; + var cutoff = DateTimeOffset.UtcNow.AddDays(-window.VisibilityWindowDays); + + // Tenant-scoped session → only the caller's realm (per-realm DB). + IQueryable query = session.Query() + .Where(x => x.Timestamp >= cutoff); + if (!string.IsNullOrWhiteSpace(category)) + query = query.Where(x => x.Category == category); + if (!string.IsNullOrWhiteSpace(eventType)) + query = query.Where(x => x.EventType == eventType); + + var rows = await query + .OrderByDescending(x => x.Timestamp) + .Take(Math.Clamp(limit ?? 200, 1, 1000)) + .ToListAsync(ct); + + // Resolve the actor's identity at read time by joining to ApplicationUser + // (NOT the UserView projection) on purpose: GdprService masks the + // ApplicationUser doc IN PLACE on erase (UserName -> "deleted-{guid}", + // name/email nulled, GdprService.cs:230-243), so an erased user shows up + // de-identified here for free. UserView keeps the stale real name until a + // rebuild and would leak it. + var userIds = rows.Where(r => r.UserId.HasValue).Select(r => r.UserId!.Value).Distinct().ToList(); + var names = userIds.Count == 0 + ? new Dictionary() + : (await session.Query().Where(u => userIds.Contains(u.Id)).ToListAsync(ct)) + .ToDictionary(u => u.Id, u => (string?)u.UserName); + + var dtos = rows.Select(r => new AuditLogEntryDto( + r.Timestamp, r.Realm, r.Category, r.EventType, + r.UserId is { } uid && names.TryGetValue(uid, out var name) ? name : null, + r.Ip, r.Method, r.Count, r.Level)); + + return Results.Ok(dtos); + }) + .WithName("AdminAudit_Get") + .RequiresPermission("audit-log:read"); + + return application; + } +} + +/// Read DTO for the tenant audit grid: the row +/// plus the actor's display identity () resolved at read time from +/// the erasure-masked ApplicationUser doc — so an erased user reads as +/// deleted-{guid}, never their real name. +public sealed record AuditLogEntryDto( + DateTimeOffset Timestamp, + string? Realm, + string Category, + string EventType, + string? User, + string? Ip, + string? Method, + int? Count, + string Level); diff --git a/src/dotnet/Modgud.Authentication/Api/Admin/AuthLogEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Admin/AuthLogEndpoints.cs index a8ebf4b6..7291e91d 100644 --- a/src/dotnet/Modgud.Authentication/Api/Admin/AuthLogEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Admin/AuthLogEndpoints.cs @@ -1,66 +1,105 @@ +using System.Security.Claims; using Marten; using Microsoft.AspNetCore.Http; using Modgud.Authorization.AspNetCore; -using Modgud.Authentication.AuthLog; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Persistence.Tenancy; using Modgud.Infrastructure.Realms; namespace Modgud.Authentication.Api.Admin; /// -/// Admin auth-log surface. Entries are persisted to the system DB (a single -/// cross-realm audit store) but attributed to a realm via -/// AuthLogDocument.Realm. The read/clear here scope by the CALLER'S realm -/// so a tenant realm-admin sees and clears only their own realm's events; the -/// control-plane realm (the cross-realm operator, per -/// Realm.IsControlPlane) sees and can clear the full cross-realm log. +/// Admin Security log surface (logging/audit redesign Track A — the streamless +/// half). Reads the typed store: unknown-actor login +/// attempts, probes, rate-limits, policy rejections, and operational actions. Entries +/// live cross-realm in the system DB but are attributed to a realm via +/// . /// -/// The control-plane check reads the request's resolved -/// (IsControlPlane), NOT a hard-coded "system" -/// slug — so the global view follows the control-plane role if it is ever -/// transferred to another realm, and a realm that merely happens to be named -/// "system" but no longer holds the role cannot see other realms' events. +/// The read/clear scope by the CALLER'S realm so a tenant realm-admin sees and +/// clears only their own realm's tenant-visible events; the control-plane +/// realm (per TenantInfo.IsControlPlane, not a hard-coded "system" slug) sees and +/// clears the full cross-realm log including control-plane-only operational rows +/// (). This carries PR #50's scoping forward +/// and extends it with the platform-only visibility gate. /// -/// Without this the read used a tenant-scoped session against the caller's -/// (empty) tenant DB, so non-system realm-admins saw nothing while the system -/// view commingled every realm. +/// The HTTP surface (route, shape) is carried forward from the legacy AuthLog so +/// the SPA keeps working; the backing store changed from the flat AuthLogDocument to the +/// typed SecurityAuditEntry. /// public static class AuthLogEndpoints { public static WebApplication MapAuthLogEndpoints(this WebApplication application, string path) { var group = application.MapGroup($"{path}/admin/auth-log") - .WithTags("Admin Auth Log") + .WithTags("Admin Security Log") .RequireAuthorization(); - group.MapGet("", async (IDocumentStore store, HttpContext http, int? limit) => + group.MapGet("", async ( + IDocumentStore store, + HttpContext http, + string? category, + string? eventType, + int? limit) => { await using var session = store.QuerySession(TenantConstants.SystemTenantId); - var entries = await ScopeToCallerRealm( - session.Query(), TenantContext.Current, IsControlPlane(http)) + + var query = ScopeToCallerRealm( + session.Query(), TenantContext.Current, IsControlPlane(http)); + if (!string.IsNullOrWhiteSpace(category)) + query = query.Where(x => x.Category == category); + if (!string.IsNullOrWhiteSpace(eventType)) + query = query.Where(x => x.EventType == eventType); + + var rows = await query .OrderByDescending(x => x.Timestamp) - .Take(limit ?? 200) + .Take(Math.Clamp(limit ?? 200, 1, 1000)) .ToListAsync(); - return Results.Ok(entries); + // Carry-forward DTO: the legacy grid columns (Timestamp/Level/Message/ + // UserName/Ip/Realm) keep their names — Actor maps to UserName — plus the + // new EventType/Category for taxonomy-chip filtering and Status/Reason. + var dtos = rows.Select(r => new SecurityLogEntryDto( + r.Timestamp, r.Realm, r.Category, r.EventType, r.Level, + r.Actor, r.Ip, r.Status, r.Reason, r.Message)); + + return Results.Ok(dtos); }) .WithName("AdminAuthLog_Get") .RequiresPermission("auth-log:read"); - // Clearing the auth log is destructive — gate behind the global app:admin - // bypass. (We deliberately don't add an `auth-log:write` since the only - // write op is wipe-all.) Scoped to the caller's realm; the control-plane - // realm wipes the full log. - group.MapDelete("", async (IDocumentStore store, HttpContext http) => + // Clearing the security log is destructive — gate behind the global app:admin + // bypass. Scoped to the caller's realm; the control-plane realm wipes the full + // log. The clear is itself audited (audit-of-the-audit): a typed + // audit.log_cleared record naming the operator is emitted AFTER the wipe, so it + // survives as the forensic trail of who cleared what, when. + group.MapDelete("", async ( + IDocumentStore store, + HttpContext http, + ClaimsPrincipal user, + ISecurityAuditLog securityAudit) => { var callerRealm = TenantContext.Current; + var isControlPlane = IsControlPlane(http); + await using var session = store.LightweightSession(TenantConstants.SystemTenantId); - if (IsControlPlane(http)) - session.DeleteWhere(x => true); + if (isControlPlane) + session.DeleteWhere(x => true); else - session.DeleteWhere(x => x.Realm == callerRealm); + session.DeleteWhere(x => x.Realm == callerRealm); await session.SaveChangesAsync(); - return Results.Ok(new { Message = "Auth log cleared" }); + + var operatorName = user.Identity?.Name ?? "(unknown)"; + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.AuditLogCleared, + Level = "Warning", + Actor = operatorName, + Status = "cleared", + Reason = isControlPlane ? "all realms (control-plane)" : $"realm {callerRealm}", + Message = $"Security log cleared by {operatorName}", + }); + + return Results.Ok(new { Message = "Security log cleared" }); }) .WithName("AdminAuthLog_Clear") .RequiresPermission("realm:admin"); @@ -72,14 +111,31 @@ private static bool IsControlPlane(HttpContext http) => http.Items[TenantConstants.HttpContextTenantInfoKey] is TenantInfo info && info.IsControlPlane; /// - /// Realm-scopes an auth-log query: the control-plane realm sees the full - /// cross-realm log; every other realm sees only its own entries. Pure + - /// provider-agnostic so it composes over either Marten's IQueryable or an - /// in-memory one (used by the unit tests). + /// Realm-scopes a security-log query: the control-plane realm sees the full + /// cross-realm log (including control-plane-only operational rows); every other + /// realm sees only its own realm's tenant-visible entries + /// (!PlatformOnly). Pure + provider-agnostic so it composes over either + /// Marten's IQueryable or an in-memory one (used by the unit tests). /// - public static IQueryable ScopeToCallerRealm( - IQueryable query, string callerRealm, bool callerIsControlPlane) + public static IQueryable ScopeToCallerRealm( + IQueryable query, string callerRealm, bool callerIsControlPlane) => callerIsControlPlane ? query - : query.Where(x => x.Realm == callerRealm); + : query.Where(x => x.Realm == callerRealm && !x.PlatformOnly); } + +/// Read DTO for the Security log grid. Carries the legacy column names +/// ( = the entry's Actor) so the existing SPA keeps +/// working, plus the typed / for chip +/// filtering and / detail. +public sealed record SecurityLogEntryDto( + DateTimeOffset Timestamp, + string? Realm, + string Category, + string EventType, + string Level, + string? UserName, + string? Ip, + string? Status, + string? Reason, + string Message); diff --git a/src/dotnet/Modgud.Authentication/Api/Admin/RealmSettingsEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/Admin/RealmSettingsEndpoints.cs index da281f18..69c2933d 100644 --- a/src/dotnet/Modgud.Authentication/Api/Admin/RealmSettingsEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/Admin/RealmSettingsEndpoints.cs @@ -1,8 +1,8 @@ using System.Security.Claims; -using Microsoft.Extensions.Logging; using Modgud.Application.DTOs.RealmSettings; using Modgud.Authentication.RealmSettings; using Modgud.Authorization.AspNetCore; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Persistence.Tenancy; using Modgud.Infrastructure.Realms; @@ -74,19 +74,24 @@ public static WebApplication MapRealmSettingsEndpoints(this WebApplication app, group.MapPost("rotate-signing-key", async ( IRealmKeyStore keyStore, ClaimsPrincipal user, - ILoggerFactory loggerFactory, + ISecurityAuditLog securityAudit, CancellationToken ct) => { var slug = TenantContext.Current; var creds = await keyStore.RotateAsync(slug, ct); var kid = creds.Key.KeyId; - // "Auth:"-prefixed → captured by the AuthLogSink into the admin - // Auth-Log. UserName is surfaced as its own audit column. - loggerFactory.CreateLogger("Modgud.Authentication.Api.Admin.RealmSettings") - .LogWarning( - "Auth: signing key rotated for realm {Realm} by {UserName} — new kid {Kid}", - slug, user.Identity?.Name ?? "(unknown)", kid); + var userName = user.Identity?.Name ?? "(unknown)"; + // Request context — leave Realm unset (ambient TenantContext is correct). + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.SigningKeyRotated, + Level = "Warning", + Actor = userName, + Status = "rotated", + Reason = $"kid {kid}", + Message = $"signing key rotated by {userName} — new kid {kid}", + }); return Results.Ok(new RotateSigningKeyResponseDto(kid)); }) diff --git a/src/dotnet/Modgud.Authentication/Api/Admin/RecoveryCli.cs b/src/dotnet/Modgud.Authentication/Api/Admin/RecoveryCli.cs index 95a83920..3ce7d9a1 100644 --- a/src/dotnet/Modgud.Authentication/Api/Admin/RecoveryCli.cs +++ b/src/dotnet/Modgud.Authentication/Api/Admin/RecoveryCli.cs @@ -10,6 +10,7 @@ using Modgud.Domain.OAuth.Common; using Modgud.Domain.Realms; using Modgud.Permissions; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Persistence.Tenancy; using Modgud.Infrastructure.Realms; using Marten; @@ -33,8 +34,9 @@ namespace Modgud.Authentication.Api.Admin; /// /// Requires shell access to the host — anyone who can docker exec already has /// DB access, so this doesn't open a new privilege-escalation path. Every invocation -/// is written to the standard auth log ("Auth:" prefix) with a Recovery: -/// subprefix so admins can audit usage after the fact. +/// emits an ops.recovery_cli_invoked record to the streamless security/ops +/// store (flushed synchronously before the process exits, since this path never +/// starts the host) so admins can audit usage after the fact. /// public static class RecoveryCli { @@ -58,12 +60,13 @@ public static async Task RunAsync(IServiceProvider services, string[] args, var session = scope.ServiceProvider.GetRequiredService(); var userManager = scope.ServiceProvider.GetRequiredService>(); var permissions = scope.ServiceProvider.GetRequiredService(); + var securityAudit = scope.ServiceProvider.GetRequiredService(); return command switch { "list" => await ListUsersAsync(session, permissions), - "reset-2fa" => await Reset2FaAsync(session, userManager, args), - "set-email" => await SetEmailAsync(session, userManager, args), + "reset-2fa" => await Reset2FaAsync(session, userManager, args, securityAudit, realmSlug), + "set-email" => await SetEmailAsync(session, userManager, args, securityAudit, realmSlug), "magic-link" => await MagicLinkAsync(session, scope.ServiceProvider, args, conf, env), "rebuild-projections" => await RebuildProjectionsAsync(scope.ServiceProvider, realmSlug), "bootstrap-admin" => await BootstrapAdminAsync(scope.ServiceProvider, args, realmSlug), @@ -193,7 +196,9 @@ private static async Task ListUsersAsync(IDocumentSession session, IPermiss private static async Task Reset2FaAsync( IDocumentSession session, UserManager userManager, - string[] args) + string[] args, + ISecurityAuditLog securityAudit, + string realmSlug) { if (args.Length < 2) return Error("Usage: recover reset-2fa "); var userName = args[1].Trim().ToLowerInvariant(); @@ -235,9 +240,16 @@ private static async Task Reset2FaAsync( await session.SaveChangesAsync(); - Serilog.Log.Warning( - "Auth: Recovery reset-2fa. User={UserName} TOTP={WasTotp} EmailOtp={WasEmailOtp} PasskeysDeleted={Passkeys}", - user.UserName, wasTotpEnabled, wasEmailOtpEnabled, passkeys.Count); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = realmSlug, + Actor = user.Id.ToString(), + Status = "succeeded", + Reason = $"reset-2fa: UserId={user.Id} TOTP={wasTotpEnabled} EmailOtp={wasEmailOtpEnabled} PasskeysDeleted={passkeys.Count}", + Message = $"Recovery reset-2fa. UserId={user.Id} TOTP={wasTotpEnabled} EmailOtp={wasEmailOtpEnabled} PasskeysDeleted={passkeys.Count}", + }); Console.WriteLine($"✓ 2FA reset for {user.UserName}:"); Console.WriteLine($" TOTP disabled: {(wasTotpEnabled ? "yes" : "was already off")}"); @@ -252,7 +264,9 @@ private static async Task Reset2FaAsync( private static async Task SetEmailAsync( IDocumentSession session, UserManager userManager, - string[] args) + string[] args, + ISecurityAuditLog securityAudit, + string realmSlug) { if (args.Length < 3) return Error("Usage: recover set-email "); var userName = args[1].Trim().ToLowerInvariant(); @@ -294,8 +308,16 @@ private static async Task SetEmailAsync( await session.SaveChangesAsync(); - Serilog.Log.Warning("Auth: Recovery set-email. User={UserName} Old={Old} New={New}", - user.UserName, oldEmail, newEmail); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = realmSlug, + Actor = user.Id.ToString(), + Status = "succeeded", + Reason = $"set-email: UserId={user.Id} Old={LogPiiMasking.MaskEmail(oldEmail)} New={LogPiiMasking.MaskEmail(newEmail)}", + Message = $"Recovery set-email. UserId={user.Id} Old={LogPiiMasking.MaskEmail(oldEmail)} New={LogPiiMasking.MaskEmail(newEmail)}", + }); Console.WriteLine($"✓ Email updated for {user.UserName}:"); Console.WriteLine($" Old: {oldEmail ?? "(none)"}"); @@ -346,8 +368,15 @@ private static async Task MagicLinkAsync( var appUrl = (conf.PublicUrl ?? (env.IsDevelopment() ? "http://localhost:4300" : conf.AppUrl)).TrimEnd('/'); var url = $"{appUrl}/magic-login?userId={user.Id}&token={Uri.EscapeDataString(token)}"; - Serilog.Log.Warning("Auth: Recovery magic-link generated. User={UserName} ExpiresAt={ExpiresAt}", - user.UserName, challenge.ExpiresAt); + scopedServices.GetRequiredService().Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Actor = user.Id.ToString(), + Status = "succeeded", + Reason = $"magic-link: UserId={user.Id} ExpiresAt={challenge.ExpiresAt:O}", + Message = $"Recovery magic-link generated. UserId={user.Id} ExpiresAt={challenge.ExpiresAt:O}", + }); Console.WriteLine($"✓ Magic link for {user.UserName} (expires in {expirationMinutes} min):"); Console.WriteLine(); @@ -371,7 +400,16 @@ private static async Task RebuildProjectionsAsync(IServiceProvider services var timeout = TimeSpan.FromMinutes(10); Console.WriteLine("Rebuilding Marten projections..."); - Serilog.Log.Warning("Auth: Recovery rebuild-projections initiated"); + var securityAudit = services.GetRequiredService(); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = tenantId, + Status = "initiated", + Reason = "rebuild-projections", + Message = "Recovery rebuild-projections initiated", + }); // MasterTableTenancy disables Marten's default tenant, so the no-arg // overload throws DefaultTenantUsageDisabledException — build the daemon @@ -386,7 +424,15 @@ private static async Task RebuildProjectionsAsync(IServiceProvider services await daemon.RebuildProjectionAsync(timeout, CancellationToken.None); Console.WriteLine(" OK PermissionRoleProjection (mt_doc_permissionrole)"); - Serilog.Log.Warning("Auth: Recovery rebuild-projections completed"); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = tenantId, + Status = "succeeded", + Reason = "rebuild-projections", + Message = "Recovery rebuild-projections completed", + }); return 0; } @@ -441,18 +487,33 @@ private static async Task BootstrapAdminAsync( var bootstrapper = scopedServices.GetRequiredService(); var result = await bootstrapper.BootstrapDirectAsync(userName, password, email, firstname, lastname); + var securityAudit = scopedServices.GetRequiredService(); if (result.IsError) { - Serilog.Log.Warning( - "Auth: Recovery bootstrap-admin failed. Realm={Realm} UserName={UserName} Code={Code} Detail={Detail}", - realmSlug, userName, result.FirstError.Code, result.FirstError.Description); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = realmSlug, + Actor = LogPiiMasking.MaskUsername(userName), + Status = "failed", + Reason = $"bootstrap-admin: UserName={LogPiiMasking.MaskUsername(userName)} Code={result.FirstError.Code} Detail={result.FirstError.Description}", + Message = $"Recovery bootstrap-admin failed. Realm={realmSlug} UserName={LogPiiMasking.MaskUsername(userName)} Code={result.FirstError.Code} Detail={result.FirstError.Description}", + }); return Error($"{result.FirstError.Code}: {result.FirstError.Description}"); } var admin = result.Value; - Serilog.Log.Warning( - "Auth: Recovery bootstrap-admin succeeded. Realm={Realm} UserName={UserName} Mode=Direct", - realmSlug, admin.UserName); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = realmSlug, + Actor = admin.UserId.ToString(), + Status = "succeeded", + Reason = $"bootstrap-admin: UserId={admin.UserId} Mode=Direct", + Message = $"Recovery bootstrap-admin succeeded. Realm={realmSlug} UserId={admin.UserId} Mode=Direct", + }); Console.WriteLine($"✓ Admin created in realm '{realmSlug}':"); Console.WriteLine($" UserName: {admin.UserName}"); @@ -489,9 +550,16 @@ private static async Task BootstrapAdminInviteAsync( issuedBy: null, // CLI invocation — no authenticated CP-admin realm); - Serilog.Log.Warning( - "Auth: Recovery bootstrap-admin issued invite. Realm={Realm} UserName={UserName} Email={Email} ExpiresAt={ExpiresAt}", - realmSlug, userName, email, invite.ExpiresAt); + scopedServices.GetRequiredService().Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = realmSlug, + Actor = LogPiiMasking.MaskUsername(userName), + Status = "initiated", + Reason = $"bootstrap-admin invite: UserName={LogPiiMasking.MaskUsername(userName)} Email={LogPiiMasking.MaskEmail(email)} ExpiresAt={invite.ExpiresAt:O}", + Message = $"Recovery bootstrap-admin issued invite. Realm={realmSlug} UserName={LogPiiMasking.MaskUsername(userName)} Email={LogPiiMasking.MaskEmail(email)} ExpiresAt={invite.ExpiresAt:O}", + }); Console.WriteLine($"✓ Bootstrap-invite issued for realm '{realmSlug}':"); Console.WriteLine($" UserName: {invite.UserName}"); @@ -643,9 +711,15 @@ private static async Task MigrateClientCredentialsAsync( await session.SaveChangesAsync(); - Serilog.Log.Warning( - "Auth: Recovery migrate-cc-credentials completed. Realm={Realm} Migrated={Migrated} SaCreated={SaCreated} SaReused={SaReused}", - realmSlug, migrated, saCreated, saReused); + scopedServices.GetRequiredService().Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = realmSlug, + Status = "succeeded", + Reason = $"migrate-cc-credentials: Migrated={migrated} SaCreated={saCreated} SaReused={saReused}", + Message = $"Recovery migrate-cc-credentials completed. Realm={realmSlug} Migrated={migrated} SaCreated={saCreated} SaReused={saReused}", + }); Console.WriteLine(); Console.WriteLine($"✓ Done. Migrated={migrated} ServiceAccounts created={saCreated} re-used={saReused}"); @@ -719,7 +793,15 @@ private static async Task RealmAddDomainAsync(IServiceProvider services, st Console.WriteLine($"✓ Added '{domain}' to realm '{slug}'. Now: [{string.Join(", ", realm.Domains)}]"); PrintRestartHint(); - Serilog.Log.Warning("Auth: Recovery realm-add-domain — Realm={Slug} Domain={Domain}", slug, domain); + services.GetRequiredService().Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = slug, + Status = "succeeded", + Reason = $"realm-add-domain: Realm={slug} Domain={domain}", + Message = $"Recovery realm-add-domain — Realm={slug} Domain={domain}", + }); return 0; } @@ -751,7 +833,15 @@ private static async Task RealmRemoveDomainAsync(IServiceProvider services, Console.WriteLine($"✓ Removed '{domain}' from realm '{slug}'. Now: [{string.Join(", ", remaining)}]"); PrintRestartHint(); - Serilog.Log.Warning("Auth: Recovery realm-remove-domain — Realm={Slug} Domain={Domain}", slug, domain); + services.GetRequiredService().Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = slug, + Status = "succeeded", + Reason = $"realm-remove-domain: Realm={slug} Domain={domain}", + Message = $"Recovery realm-remove-domain — Realm={slug} Domain={domain}", + }); return 0; } @@ -793,7 +883,15 @@ private static async Task ControlPlaneAsync(IServiceProvider services, stri if (result.IsError) return Error($"{result.FirstError.Code}: {result.FirstError.Description}"); - Serilog.Log.Warning("Auth: Recovery control-plane transfer. Target={Slug}", targetSlug); + services.GetRequiredService().Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = targetSlug, + Status = "succeeded", + Reason = $"control-plane transfer: Target={targetSlug}", + Message = $"Recovery control-plane transfer. Target={targetSlug}", + }); Console.WriteLine($"✓ Control plane transferred to realm '{targetSlug}'."); PrintRestartHint(); return 0; @@ -826,7 +924,15 @@ private static async Task AdoptTenantAsync(IServiceProvider services, strin if (result.IsError) return Error($"{result.FirstError.Code}: {result.FirstError.Description}"); - Serilog.Log.Warning("Auth: Recovery adopt-tenant. Slug={Slug}", slug); + services.GetRequiredService().Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = slug, + Status = "succeeded", + Reason = $"adopt-tenant: Slug={slug}", + Message = $"Recovery adopt-tenant. Slug={slug}", + }); Console.WriteLine($"✓ Adopted existing database as realm '{slug}'."); Console.WriteLine($" Domains: {string.Join(", ", result.Value.Domains)}"); PrintRestartHint(); @@ -860,7 +966,15 @@ private static async Task RotateSigningKeyAsync(IServiceProvider services, var creds = await keyStore.RotateAsync(tenantId); var kid = creds.Key.KeyId; - Serilog.Log.Warning("Auth: Recovery rotate-signing-key. Realm={Realm} NewKid={Kid}", tenantId, kid); + services.GetRequiredService().Record(new SecurityAuditRecord + { + EventType = AuditEvents.RecoveryCliInvoked, + Level = "Warning", + Realm = tenantId, + Status = "rotated", + Reason = $"rotate-signing-key: Realm={tenantId} NewKid={kid}", + Message = $"Recovery rotate-signing-key. Realm={tenantId} NewKid={kid}", + }); Console.WriteLine($" OK new active kid: {kid}"); Console.WriteLine(" Previous key retired into the 30-day verification overlap window."); // The CLI is a separate process — it only mutates its OWN in-memory key diff --git a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/DynamicOidcSchemeManager.cs b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/DynamicOidcSchemeManager.cs index 0dd49908..f6638696 100644 --- a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/DynamicOidcSchemeManager.cs +++ b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/DynamicOidcSchemeManager.cs @@ -70,7 +70,7 @@ public async Task RegisterAsync(LoginProvider config) if (config.Type != LoginProviderType.Oidc) { logger.LogInformation( - "Auth: skipping non-Oidc LoginProvider {Id} of type {Type}", + "skipping non-Oidc LoginProvider {Id} of type {Type}", config.Id, config.Type); return; } @@ -203,7 +203,7 @@ public async Task RegisterAsync(LoginProvider config) OnTokenValidated = ctx => { logger.LogInformation( - "Auth: External OIDC token validated for scheme {Scheme} — sub={Subject}, iss={Issuer}", + "External OIDC token validated for scheme {Scheme} — sub={Subject}, iss={Issuer}", ctx.Scheme.Name, ctx.Principal?.FindFirst("sub")?.Value, ctx.Principal?.FindFirst("iss")?.Value); @@ -212,7 +212,7 @@ public async Task RegisterAsync(LoginProvider config) OnRemoteFailure = ctx => { logger.LogWarning(ctx.Failure, - "Auth: OIDC remote failure for scheme {Scheme}: {Error}", + "OIDC remote failure for scheme {Scheme}: {Error}", ctx.Scheme.Name, ctx.Failure?.Message ?? "(no detail)"); ctx.HandleResponse(); var detail = ctx.Failure is null ? "oidc" @@ -244,7 +244,7 @@ public async Task RegisterAsync(LoginProvider config) handlerType: typeof(HostAwareOpenIdConnectHandler)); schemeProvider.AddScheme(scheme); - logger.LogInformation("Auth: Registered OIDC scheme {Scheme} (LoginProvider {Display} / {Flavor}) in realm {Realm}", + logger.LogInformation("Registered OIDC scheme {Scheme} (LoginProvider {Display} / {Flavor}) in realm {Realm}", schemeName, config.DisplayName, config.Flavor, realmSlug); } @@ -254,7 +254,7 @@ public async Task UnregisterAsync(Guid loginProviderId) schemeProvider.RemoveScheme(schemeName); oidcOptionsCache.TryRemove(schemeName); realmRegistry.Remove(schemeName); - logger.LogInformation("Auth: Unregistered OIDC scheme {Scheme}", schemeName); + logger.LogInformation("Unregistered OIDC scheme {Scheme}", schemeName); await Task.CompletedTask; } diff --git a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/ExternalLoginProcessor.cs b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/ExternalLoginProcessor.cs index 199663db..154ffd40 100644 --- a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/ExternalLoginProcessor.cs +++ b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/ExternalLoginProcessor.cs @@ -13,6 +13,7 @@ using Modgud.Authorization.Services; using Modgud.Domain.Users.Events; using Modgud.Authentication.Identity.ExternalAuth; +using Modgud.Infrastructure.Audit; using Modgud.Permissions.Abstractions; @@ -38,6 +39,7 @@ public class ExternalLoginProcessor( UserUpdateScriptRunner scriptRunner, ILoginTimeMembershipDeriver membershipDeriver, ILogger logger, + ISecurityAuditLog securityAudit, TimeProvider clock) { public async Task ProcessAsync( @@ -59,9 +61,14 @@ public async Task ProcessAsync( if (config.Type != LoginProviderType.Oidc && config.Type != LoginProviderType.Saml) { var err = LoginProviderErrors.TypeNotSupported(config.Type); - logger.LogWarning( - "Auth: External login rejected — LoginProvider {Id} has type {Type}, expected Oidc or Saml", - loginProviderId, config.Type); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Status = "rejected", + Reason = $"misconfigured provider type {config.Type} (LoginProvider {loginProviderId})", + Message = "External login rejected — provider misconfigured (expected Oidc or Saml)", + }); return ExternalLoginResult.Failed(err.Code, err.Description); } @@ -74,7 +81,14 @@ public async Task ProcessAsync( if (string.IsNullOrWhiteSpace(issuer) || string.IsNullOrWhiteSpace(subject)) { - logger.LogWarning("Auth: External login missing iss/sub (config {Id})", loginProviderId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Status = "rejected", + Reason = "missing iss/sub", + Message = "External login rejected — identity provider returned no iss/sub", + }); return ExternalLoginResult.Failed("Idp.InvalidToken", "The identity provider did not return a subject."); } @@ -83,7 +97,7 @@ public async Task ProcessAsync( if (!scriptResult.Succeeded) { logger.LogWarning( - "Auth: UserUpdateScript failed for LoginProvider {Id} subject {Sub} — {Error}; continuing without property updates", + "UserUpdateScript failed for LoginProvider {Id} subject {Sub} — {Error}; continuing without property updates", loginProviderId, subject, scriptResult.Error); } @@ -116,7 +130,7 @@ public async Task ProcessAsync( if (linkedUser is null || link.IsUnlinked) { logger.LogInformation( - "Auth: External identity {State} link {LinkId} forgotten (was user {UserId}) — re-matching by policy", + "External identity {State} link {LinkId} forgotten (was user {UserId}) — re-matching by policy", link.IsUnlinked ? "unlinked" : "stale", link.Id, link.UserId); session.Events.Append(link.Id, new ExternalIdentityUnlinkedEvent(link.Id, capturedAt, link.UserId)); @@ -131,9 +145,14 @@ public async Task ProcessAsync( // live one is not.) if (authenticatedUserId is { } authId && authId != link.UserId) { - logger.LogWarning( - "Auth: Link-attempt rejected — external subject already linked to different user (authUser={AuthId}, linkUser={LinkId})", - authId, link.UserId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.IdentityHijackBlocked, + Level = "Warning", + Status = "rejected", + Reason = "external subject already linked to a different user", + Message = "Link attempt rejected — external subject already linked to a different user", + }); return ExternalLoginResult.Failed("Idp.LinkedToOtherUser", "This identity is already linked to another Modgud account."); } @@ -150,7 +169,7 @@ public async Task ProcessAsync( } await RecordScriptRunAsync(link, config, scriptResult, rawClaims, capturedAt, ct); - logger.LogInformation("Auth: External login (returning) user {UserId} via IdP {IdpId}", linkedUser.Id, loginProviderId); + logger.LogInformation("External login (returning) user {UserId} via IdP {IdpId}", linkedUser.Id, loginProviderId); return await Success(linkedUser, link, externalPrincipal, loginProviderId, issuer, config, externalGroups, ct); } } @@ -177,7 +196,7 @@ public async Task ProcessAsync( existing.Id, loginProviderId, issuer, subject, scriptResult, rawClaims, config.StoreRawClaims, config.Slug, isCreator: false, capturedAt, ct); logger.LogInformation( - "Auth: External identity linked to existing user {UserId} via IdP {IdpId}", + "External identity linked to existing user {UserId} via IdP {IdpId}", existing.Id, loginProviderId); return await Success(existing, addedLink, externalPrincipal, loginProviderId, issuer, config, externalGroups, ct); } @@ -186,9 +205,16 @@ public async Task ProcessAsync( var email = scriptResult.Email.Presence == FieldPresence.Value ? scriptResult.Email.Value : null; if (!IsEmailAllowed(config, email)) { - logger.LogWarning( - "Auth: External login rejected — email '{MaskedEmail}' not in allowlist for IdP {IdpId}", - LogPiiMasking.MaskEmail(email), loginProviderId); + var maskedEmail = LogPiiMasking.MaskEmail(email); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Actor = maskedEmail, + Status = "rejected", + Reason = "email not in allowlist", + Message = $"External login rejected — email '{maskedEmail}' not in allowlist", + }); return ExternalLoginResult.Failed("Idp.EmailNotAllowed", "Your email domain is not allowed for this provider."); } @@ -216,7 +242,7 @@ public async Task ProcessAsync( var newLink = await CreateLinkAsync(user.Id, loginProviderId, issuer, subject, scriptResult, rawClaims, config.StoreRawClaims, config.Slug, isCreator: false, capturedAt, ct); logger.LogInformation( - "Auth: External login (email-linked) user {UserId} via IdP {IdpId}", user.Id, loginProviderId); + "External login (email-linked) user {UserId} via IdP {IdpId}", user.Id, loginProviderId); return await Success(user, newLink, externalPrincipal, loginProviderId, issuer, config, externalGroups, ct); } } @@ -225,8 +251,14 @@ public async Task ProcessAsync( // 4. JIT user creation if (!config.AutoCreateUsers) { - logger.LogWarning( - "Auth: External login rejected — no existing link, AutoCreateUsers=false for IdP {IdpId}", loginProviderId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Status = "rejected", + Reason = "no existing link and JIT creation disabled", + Message = "External login rejected — no existing link and automatic user creation is disabled", + }); return ExternalLoginResult.Failed("Idp.NoUserAndAutoCreateOff", "No user is linked to this identity and automatic creation is disabled."); } @@ -242,9 +274,16 @@ public async Task ProcessAsync( .AnyAsync(ct); if (emailTaken) { - logger.LogWarning( - "Auth: JIT creation rejected — email '{MaskedEmail}' is already taken by another user (IdP {IdpId})", - LogPiiMasking.MaskEmail(email), loginProviderId); + var maskedEmail = LogPiiMasking.MaskEmail(email); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.JitEmailConflict, + Level = "Warning", + Actor = maskedEmail, + Status = "rejected", + Reason = "email already taken (JIT create)", + Message = $"JIT creation rejected — email '{maskedEmail}' is already taken by another user", + }); return ExternalLoginResult.Failed("Idp.EmailConflict", "A Modgud account with this email already exists. Please contact your administrator."); } @@ -256,7 +295,7 @@ public async Task ProcessAsync( // This provider created the user — mark the link as the creator so it // stays profile-authoritative by default (decision A). var jitLink = await CreateLinkAsync(created.Id, loginProviderId, issuer, subject, scriptResult, rawClaims, config.StoreRawClaims, config.Slug, isCreator: true, capturedAt, ct); - logger.LogInformation("Auth: External login (JIT-created) user {UserId} via IdP {IdpId}", created.Id, loginProviderId); + logger.LogInformation("External login (JIT-created) user {UserId} via IdP {IdpId}", created.Id, loginProviderId); return await Success(created, jitLink, externalPrincipal, loginProviderId, issuer, config, externalGroups, ct); } @@ -294,9 +333,16 @@ public async Task ProcessAsync( .FirstOrDefaultAsync(ct); if (clashingUserId != Guid.Empty) { - logger.LogWarning( - "Auth: UserUpdateScript email conflict — '{MaskedEmail}' is already taken by user {OtherId}; login rejected for user {UserId}", - LogPiiMasking.MaskEmail(newEmail), clashingUserId, user.Id); + var maskedEmail = LogPiiMasking.MaskEmail(newEmail); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.JitEmailConflict, + Level = "Warning", + Actor = maskedEmail, + Status = "rejected", + Reason = "email already taken (user-update script)", + Message = $"UserUpdateScript email conflict — '{maskedEmail}' is already taken by another user; login rejected", + }); return new ApplyUpdatesError( "Idp.EmailConflict", "The identity provider reports an email that is already used by another Modgud account."); @@ -336,7 +382,7 @@ public async Task ProcessAsync( var result = await userManager.UpdateAsync(user); if (!result.Succeeded) { - logger.LogError("Auth: UserUpdateScript property update failed — {Errors}", + logger.LogError("UserUpdateScript property update failed — {Errors}", string.Join("; ", result.Errors.Select(e => $"{e.Code}:{e.Description}"))); return new ApplyUpdatesError( "Idp.UserUpdateFailed", @@ -374,8 +420,15 @@ private async Task Success( // IsActive=true, so the JIT path passes this gate unaffected. if (user.IsDeleted || !user.IsActive) { - logger.LogWarning( - "Auth: External login rejected — user {UserId} is inactive or deleted", user.Id); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Actor = user.Id.ToString(), + Status = "rejected", + Reason = "user inactive or deleted", + Message = $"External login rejected — user {user.Id} is inactive or deleted", + }); return ExternalLoginResult.Failed("Idp.UserInactive", "This account is not active."); } @@ -412,8 +465,23 @@ private async Task Success( identity.AddClaim(new Claim(FederationClaimTypes.SessionGroup, groupId.ToString())); if (derived.MatchedGroupIds.Count > 0) logger.LogInformation( - "Auth: external-derived grant — user {UserId} via IdP {IdpId} ({Slug}) matched {Count} session group(s)", + "external-derived grant — user {UserId} via IdP {IdpId} ({Slug}) matched {Count} session group(s)", user.Id, loginProviderId, config.Slug, derived.MatchedGroupIds.Count); + + // The deriver defensively drops any externally-derived group that would + // confer realm:admin (the config guard should make this impossible). It + // lives in the Authorization layer, which cannot reach the audit store, + // so it surfaces the count and we record the security event here. + if (derived.DroppedRealmAdminCount > 0) + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.PrivilegeEscalationBlocked, + Level = "Warning", + Actor = user.Id.ToString(), + Status = "blocked", + Reason = $"dropped {derived.DroppedRealmAdminCount} externally-derived group(s) conferring realm:admin via provider {config.Slug}", + Message = $"Blocked {derived.DroppedRealmAdminCount} externally-derived realm:admin group(s) for user {user.Id}", + }); } return new ExternalLoginResult( @@ -501,7 +569,8 @@ private async Task CreateLinkAsync( Issuer: issuer, LinkedAt: capturedAt)); - session.Events.Append(userId, new UserLoggedInEvent(userId, IpAddress: null)); + session.Events.Append(userId, new UserLoggedInEvent(userId, IpAddress: null, + Method: Modgud.Infrastructure.Observability.ModgudMeters.LoginMethod.External)); // Federation v1: refresh this provider's claims snapshot in the same // transaction as the link write. @@ -534,7 +603,8 @@ private async Task RecordScriptRunAsync( Email: email, DisplayName: displayName)); - session.Events.Append(link.UserId, new UserLoggedInEvent(link.UserId, IpAddress: null)); + session.Events.Append(link.UserId, new UserLoggedInEvent(link.UserId, IpAddress: null, + Method: Modgud.Infrastructure.Observability.ModgudMeters.LoginMethod.External)); // Federation v1: refresh this provider's claims snapshot in the same // transaction as the login write. @@ -571,7 +641,7 @@ private async Task RecordScriptRunAsync( var result = await userManager.CreateAsync(user); if (!result.Succeeded) { - logger.LogError("Auth: JIT user creation failed — {Errors}", + logger.LogError("JIT user creation failed — {Errors}", string.Join("; ", result.Errors.Select(e => $"{e.Code}:{e.Description}"))); return null; } diff --git a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/OidcSchemeBootstrap.cs b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/OidcSchemeBootstrap.cs index b3f2682b..e43f0de8 100644 --- a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/OidcSchemeBootstrap.cs +++ b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/OidcSchemeBootstrap.cs @@ -63,16 +63,16 @@ public async Task StartAsync(CancellationToken cancellationToken) try { await manager.RegisterAsync(config); } catch (Exception ex) { - logger.LogError(ex, "Auth: Bootstrap registration failed for LoginProvider {Id} in realm {Realm}", config.Id, realm.Slug); + logger.LogError(ex, "Bootstrap registration failed for LoginProvider {Id} in realm {Realm}", config.Id, realm.Slug); } } totalRegistered += enabled.Count; if (enabled.Count > 0) - logger.LogDebug("Auth: OidcSchemeBootstrap registered {Count} schemes in realm {Realm}", enabled.Count, realm.Slug); + logger.LogDebug("OidcSchemeBootstrap registered {Count} schemes in realm {Realm}", enabled.Count, realm.Slug); } - logger.LogInformation("Auth: OidcSchemeBootstrap registered {Count} external auth schemes across {Realms} realm(s)", + logger.LogInformation("OidcSchemeBootstrap registered {Count} external auth schemes across {Realms} realm(s)", totalRegistered, realms.Count); } diff --git a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/ProfileLinkEndpoints.cs b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/ProfileLinkEndpoints.cs index 881ae46e..d9d52ba5 100644 --- a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/ProfileLinkEndpoints.cs +++ b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/ProfileLinkEndpoints.cs @@ -186,8 +186,8 @@ private static async Task UnlinkAsync( await writeSession.SaveChangesAsync(ct); logger.LogInformation( - "Auth: External identity disconnected{AdminTag} — {UserName} unlinked provider {ProviderId} (link {LinkId})", - isAdmin ? " by admin" : "", user?.UserName, link.LoginProviderId, link.Id); + "External identity disconnected{AdminTag} — {UserId} unlinked provider {ProviderId} (link {LinkId})", + isAdmin ? " by admin" : "", user?.Id, link.LoginProviderId, link.Id); return Results.NoContent(); } diff --git a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/DynamicSamlSchemeManager.cs b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/DynamicSamlSchemeManager.cs index 5886e8f8..07fb67e9 100644 --- a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/DynamicSamlSchemeManager.cs +++ b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/DynamicSamlSchemeManager.cs @@ -1,6 +1,7 @@ using System.Collections.Concurrent; using Modgud.Authentication.Domain.LoginProviders; using Modgud.Authentication.Identity.LoginProviders.Saml; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Persistence.Tenancy; namespace Modgud.Authentication.Api.ExternalAuth.Saml; @@ -26,6 +27,7 @@ public class DynamicSamlSchemeManager( SamlFlavorRegistry flavors, SamlMetadataFetcher metadataFetcher, TimeProvider clock, + ISecurityAuditLog securityAudit, ILogger logger) { private readonly ConcurrentDictionary _cache = new(); @@ -57,7 +59,7 @@ public async Task RegisterAsync(LoginProvider config) // because being called with the wrong type is a code bug we want // to notice during development without crying wolf in production. logger.LogDebug( - "Auth: SAML manager called for non-SAML LoginProvider {Id} (type={Type}) — ignored", + "SAML manager called for non-SAML LoginProvider {Id} (type={Type}) — ignored", config.Id, config.Type); return; } @@ -120,14 +122,14 @@ public async Task RegisterAsync(LoginProvider config) if (idpMetadata is null) { logger.LogWarning( - "Auth: Registered SAML provider {Id} ({Display}) in realm {Realm} WITHOUT IdP metadata — " + + "Registered SAML provider {Id} ({Display}) in realm {Realm} WITHOUT IdP metadata — " + "login attempts will fail until metadata is reachable", config.Id, config.DisplayName, realmSlug); } else { logger.LogInformation( - "Auth: Registered SAML provider {Id} ({Display} / {Flavor}) in realm {Realm} " + + "Registered SAML provider {Id} ({Display} / {Flavor}) in realm {Realm} " + "with IdP {IdpEntity} and {CertCount} signing cert(s)", config.Id, config.DisplayName, config.Flavor, realmSlug, idpMetadata.EntityId, idpMetadata.SigningCertificatesBase64.Count); @@ -140,7 +142,7 @@ public Task UnregisterAsync(Guid loginProviderId) if (_cache.TryRemove(loginProviderId, out var entry)) { logger.LogInformation( - "Auth: Unregistered SAML provider {Id} ({Display})", + "Unregistered SAML provider {Id} ({Display})", loginProviderId, entry.DisplayName); } return Task.CompletedTask; @@ -224,12 +226,17 @@ public async Task RefreshMetadataAsync(Guid loginProviderId, CancellationT if (existing.IdpMetadata is null || !SequenceEqual(existing.IdpMetadata.SigningCertificatesBase64, fresh.SigningCertificatesBase64)) { - logger.LogInformation( - "Auth: SAML metadata refresh for provider {Id} changed signing certs " + - "({OldCount} → {NewCount})", - loginProviderId, - existing.IdpMetadata?.SigningCertificatesBase64.Count ?? 0, - fresh.SigningCertificatesBase64.Count); + var oldCount = existing.IdpMetadata?.SigningCertificatesBase64.Count ?? 0; + var newCount = fresh.SigningCertificatesBase64.Count; + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.SamlMetadataRefreshed, + Realm = existing.RealmSlug, + Level = "Info", + Status = "cert_changed", + Reason = $"signing certs {oldCount}->{newCount}", + Message = $"SAML metadata refresh for provider {loginProviderId} changed signing certs ({oldCount} -> {newCount})", + }); } return true; diff --git a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlLoginFlow.cs b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlLoginFlow.cs index 81149308..04275fee 100644 --- a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlLoginFlow.cs +++ b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlLoginFlow.cs @@ -12,6 +12,7 @@ using Modgud.Authentication.Domain; using Modgud.Authentication.Identity.LoginProviders.Saml; using Modgud.Authentication.Sessions; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Observability; namespace Modgud.Authentication.Api.ExternalAuth.Saml; @@ -34,6 +35,7 @@ public class SamlLoginFlow( ExternalLoginProcessor processor, SignInManager signInManager, ISessionService sessionService, + ISecurityAuditLog securityAudit, ILogger logger) { /// @@ -57,18 +59,28 @@ public async Task StartLoginAsync( { if (provider.IdpMetadata is null) { - logger.LogWarning( - "Auth: SAML login refused for provider {Id} — no IdP metadata cached", - provider.LoginProviderId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Status = "rejected", + Reason = $"SAML: no IdP metadata cached (provider {provider.LoginProviderId})", + Message = $"SAML login refused for provider {provider.Slug} — no IdP metadata cached", + }); return Results.Redirect("/login?error=saml-no-metadata"); } if (string.IsNullOrEmpty(provider.IdpMetadata.SsoRedirectUrl) && string.IsNullOrEmpty(provider.IdpMetadata.SsoPostUrl)) { - logger.LogWarning( - "Auth: SAML login refused for provider {Id} — IdP metadata has no SSO endpoint", - provider.LoginProviderId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Status = "rejected", + Reason = $"SAML: IdP metadata has no SSO endpoint (provider {provider.LoginProviderId})", + Message = $"SAML login refused for provider {provider.Slug} — IdP metadata has no SSO endpoint", + }); return Results.Redirect("/login?error=saml-no-sso"); } @@ -96,7 +108,7 @@ public async Task StartLoginAsync( binding.Bind(authnRequest); logger.LogInformation( - "Auth: SAML AuthnRequest built for provider {Id} → IdP {IdpEntity}", + "SAML AuthnRequest built for provider {Id} → IdP {IdpEntity}", provider.LoginProviderId, provider.IdpMetadata.EntityId); return Results.Redirect(binding.RedirectLocation.OriginalString); @@ -116,6 +128,8 @@ public async Task HandleAcsAsync( if (provider.IdpMetadata is null) return Results.Redirect("/login?error=saml-no-metadata"); + var ip = http.Connection.RemoteIpAddress?.ToString(); + Saml2AuthnResponse saml2Response; Saml2PostBinding binding; Saml2RequestContext ctx; @@ -126,8 +140,17 @@ public async Task HandleAcsAsync( catch (Exception ex) { logger.LogWarning(ex, - "Auth: SAML context build failed for provider {Id}", + "SAML context build failed for provider {Id}", provider.LoginProviderId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Ip = ip, + Status = "rejected", + Reason = $"SAML: context build failed (provider {provider.LoginProviderId})", + Message = $"SAML login refused for provider {provider.Slug} — context build failed", + }); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.External, ModgudMeters.LoginOutcome.Failure); return Results.Redirect("/login?error=saml-invalid"); } @@ -145,17 +168,32 @@ public async Task HandleAcsAsync( catch (Exception ex) { logger.LogWarning(ex, - "Auth: SAML response read/validate failed for provider {Id}", + "SAML response read/validate failed for provider {Id}", provider.LoginProviderId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Ip = ip, + Status = "rejected", + Reason = $"SAML: response read/validate failed (provider {provider.LoginProviderId})", + Message = $"SAML login refused for provider {provider.Slug} — response read/validate failed", + }); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.External, ModgudMeters.LoginOutcome.Failure); return Results.Redirect("/login?error=saml-invalid"); } if (saml2Response.Status != Saml2StatusCodes.Success) { - logger.LogWarning( - "Auth: SAML response non-success status {Status} for provider {Id}", - saml2Response.Status, provider.LoginProviderId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ExternalLoginRejected, + Level = "Warning", + Ip = ip, + Status = "rejected", + Reason = $"SAML: non-success status {saml2Response.Status} (provider {provider.LoginProviderId})", + Message = $"SAML login refused for provider {provider.Slug} — non-success status {saml2Response.Status}", + }); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.External, ModgudMeters.LoginOutcome.Failure); return Results.Redirect($"/login?error=saml-{Uri.EscapeDataString(saml2Response.Status.ToString() ?? "status")}"); } @@ -169,9 +207,15 @@ public async Task HandleAcsAsync( var sigError = CheckRequiredSignatures(saml2Response.XmlDocument, provider.FlavorData); if (sigError is not null) { - logger.LogWarning( - "Auth: SAML response failed required-signature check ({Reason}) for provider {Id}", - sigError, provider.LoginProviderId); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.SamlSignatureRejected, + Level = "Warning", + Ip = ip, + Status = "rejected", + Reason = $"SAML: required-signature check failed ({sigError}) for provider {provider.LoginProviderId}", + Message = $"SAML response failed required-signature check ({sigError}) for provider {provider.Slug}", + }); ModgudMeters.RecordLogin(ModgudMeters.LoginMethod.External, ModgudMeters.LoginOutcome.Failure); return Results.Redirect($"/login?error=saml-{Uri.EscapeDataString(sigError)}"); } diff --git a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlMetadataRefreshService.cs b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlMetadataRefreshService.cs index 88212981..b9e9a605 100644 --- a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlMetadataRefreshService.cs +++ b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlMetadataRefreshService.cs @@ -1,3 +1,5 @@ +using Modgud.Infrastructure.Audit; + namespace Modgud.Authentication.Api.ExternalAuth.Saml; /// @@ -17,6 +19,7 @@ namespace Modgud.Authentication.Api.ExternalAuth.Saml; public class SamlMetadataRefreshService( DynamicSamlSchemeManager manager, TimeProvider clock, + ISecurityAuditLog securityAudit, ILogger logger) : BackgroundService { /// @@ -44,7 +47,7 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken) catch (Exception ex) { logger.LogError(ex, - "Auth: SAML metadata refresh tick failed unexpectedly — continuing"); + "SAML metadata refresh tick failed unexpectedly — continuing"); } try { await Task.Delay(PollInterval, clock, stoppingToken); } @@ -74,16 +77,22 @@ private async Task TickAsync(CancellationToken ct) { failed++; logger.LogWarning(ex, - "Auth: SAML metadata refresh failed for provider {Id}", + "SAML metadata refresh failed for provider {Id}", entry.LoginProviderId); } } if (refreshed > 0 || failed > 0) { - logger.LogInformation( - "Auth: SAML metadata refresh tick — refreshed={Refreshed} failed={Failed} (scanned={Total})", - refreshed, failed, snapshot.Count); + // Platform-wide control-plane tick — leave Realm unset. + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.SamlMetadataRefreshed, + Level = "Info", + Status = "refreshed", + Reason = $"refreshed={refreshed} failed={failed}", + Message = $"SAML metadata refresh tick — refreshed={refreshed} failed={failed} (scanned={snapshot.Count})", + }); } } diff --git a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlSchemeBootstrap.cs b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlSchemeBootstrap.cs index 36c4ed7a..86256700 100644 --- a/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlSchemeBootstrap.cs +++ b/src/dotnet/Modgud.Authentication/Api/ExternalAuth/Saml/SamlSchemeBootstrap.cs @@ -45,7 +45,7 @@ public async Task StartAsync(CancellationToken cancellationToken) catch (Exception ex) { logger.LogError(ex, - "Auth: SAML bootstrap registration failed for LoginProvider {Id} in realm {Realm}", + "SAML bootstrap registration failed for LoginProvider {Id} in realm {Realm}", config.Id, realm.Slug); } } @@ -54,13 +54,13 @@ public async Task StartAsync(CancellationToken cancellationToken) if (enabled.Count > 0) { logger.LogDebug( - "Auth: SamlSchemeBootstrap registered {Count} providers in realm {Realm}", + "SamlSchemeBootstrap registered {Count} providers in realm {Realm}", enabled.Count, realm.Slug); } } logger.LogInformation( - "Auth: SamlSchemeBootstrap registered {Count} SAML providers across {Realms} realm(s)", + "SamlSchemeBootstrap registered {Count} SAML providers across {Realms} realm(s)", totalRegistered, realms.Count); } diff --git a/src/dotnet/Modgud.Authentication/Audit/AuthAuditView.cs b/src/dotnet/Modgud.Authentication/Audit/AuthAuditView.cs new file mode 100644 index 00000000..5e3629c0 --- /dev/null +++ b/src/dotnet/Modgud.Authentication/Audit/AuthAuditView.cs @@ -0,0 +1,65 @@ +using Marten.Schema; +using Modgud.Infrastructure.Audit; + +namespace Modgud.Authentication.Audit; + +/// +/// Flat, per-event tenant audit row — one document per audited event, projected +/// from the user- and config-aggregate streams by . +/// This is the derived read model for the GDPR-audit (it replaces the personal-data +/// portion of the old flat AuthLogDocument); durability + GDPR masking are +/// inherited from the source events, so the view is freely rebuildable. +/// +/// Metadata only — no payloads. The row records who/when/what-kind/realm, +/// never the changed values. Personal data stays on the source streams (masked on +/// erase); a permanent-erase deletes a user's rows here (Phase-2 scrub — +/// DeleteWhere<AuthAuditView>(x => x.UserId == userId)). +/// +/// Lives per-realm in each tenant DB (physical isolation — a realm cannot read +/// another realm's audit). is carried for the control-plane +/// cross-realm fan-out and parity with the legacy log; it is the event's tenant id. +/// +[DocumentAlias("auth_audit_view")] +public record AuthAuditView +{ + /// The Marten event id — one audit row per event occurrence. + public Guid Id { get; init; } + + public DateTimeOffset Timestamp { get; init; } + + /// Realm slug the event was emitted in (the event's tenant id). + public string? Realm { get; init; } + + /// code. + public string Category { get; init; } = ""; + + /// code. + public string EventType { get; init; } = ""; + + /// The data subject, for user-stream events (= the user stream id). + /// Null for config-aggregate events. + public Guid? UserId { get; init; } + + /// The config aggregate (e.g. login-provider) id, for config-stream + /// events. Null for user-stream events. + public Guid? TargetId { get; init; } + + /// Denormalised display name. Null in the Phase-0 scaffold — resolved + /// from UserView at read time, or denormalised in a later phase. + public string? UserName { get; init; } + + /// Source IP where the event carries one (e.g. a login). PII — inherits + /// the source event's GDPR masking, and the row is deleted on permanent erase. + public string? Ip { get; init; } + + /// Login method code for login events ("password" | "magic_link" | + /// "external" | …), null otherwise. Non-PII — a method switch is a security signal. + public string? Method { get; init; } + + /// Aggregate count for summary events (e.g. the failed-attempt count on + /// an auth.login_failures_observed row). Null for single-occurrence rows. + public int? Count { get; init; } + + /// "Info" | "Warning" | "Error" — preserves the legacy level mapping. + public string Level { get; init; } = "Info"; +} diff --git a/src/dotnet/Modgud.Authentication/Audit/AuthAuditViewProjection.cs b/src/dotnet/Modgud.Authentication/Audit/AuthAuditViewProjection.cs new file mode 100644 index 00000000..8f5a3b90 --- /dev/null +++ b/src/dotnet/Modgud.Authentication/Audit/AuthAuditViewProjection.cs @@ -0,0 +1,131 @@ +using JasperFx.Events; +using Marten.Events.Projections; +using Modgud.Authentication.Domain.ExternalAuth.Events; +using Modgud.Authentication.Domain.LoginProviders.Events; +using Modgud.Authentication.Events; +using Modgud.Domain.Users.Events; +using Modgud.Infrastructure.Audit; + +namespace Modgud.Authentication.Audit; + +/// +/// Async that folds the user- and config-aggregate +/// streams into the flat read model — one row per +/// audited event. This is deliberately an EventProjection, not a +/// Single/MultiStream aggregation: an audit trail is a list of occurrences, +/// not a per-aggregate snapshot. (See dev-docs/future-features/logging-audit-redesign.md §A.3.) +/// +/// Metadata comes from the envelope: Id keys +/// the row, Timestamp is the occurrence time, TenantId is the realm +/// slug, and for user-stream events StreamId is the subject user id. No PII +/// payload is copied into the view — see . +/// +/// partial because Marten 9's source generator emits the event +/// dispatcher into the class — see dev-docs/engineering-gotchas/marten-raise-side-effects.md. +/// +/// SCOPE (Phase 0): user-aggregate auth/lifecycle events + the login-provider +/// config family. OAuth application/scope/API config events are the next mechanical +/// addition (same pattern: one Create(IEvent<T>) per type). +/// +public partial class AuthAuditViewProjection : EventProjection +{ + public AuthAuditViewProjection() + { + // A GDPR-erased user is masked, not deleted: their events are masked in + // place and then ARCHIVED (kept, hidden from active queries). Include + // archived events so a full rebuild regenerates the erased user's rows + // FROM the masked events (Ip already null) — the masked archived events are + // the durable, de-identified audit record, so no separate store is needed. + // See dev-docs/future-features/logging-audit-redesign.md §A.4.2. + IncludeArchivedEvents = true; + } + + private static AuthAuditView Row( + IEvent e, + string category, + string eventType, + Guid? userId = null, + Guid? targetId = null, + string? ip = null, + string? method = null, + int? count = null, + string level = "Info") => + new() + { + Id = e.Id, + Timestamp = e.Timestamp, + Realm = e.TenantId, + Category = category, + EventType = eventType, + UserId = userId, + TargetId = targetId, + Ip = ip, + Method = method, + Count = count, + Level = level, + }; + + // ── Authentication (user stream — StreamId == userId) ──────────── + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Authentication, AuditEvents.LoginSucceeded, userId: e.StreamId, ip: e.Data.IpAddress, method: e.Data.Method); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Authentication, AuditEvents.LoginFailed, userId: e.StreamId, ip: e.Data.IpAddress, level: "Warning"); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Authentication, AuditEvents.LoginFailuresObserved, userId: e.StreamId, count: e.Data.FailedCount, level: "Warning"); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Authentication, AuditEvents.AccountLockedOut, userId: e.StreamId, level: "Warning"); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Authentication, AuditEvents.AccountUnlocked, userId: e.StreamId); + + // ── Account lifecycle (user stream) ────────────────────────────── + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Account, AuditEvents.AccountCreated, userId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Account, AuditEvents.AccountDeleted, userId: e.StreamId, level: "Warning"); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Account, AuditEvents.AccountProfileUpdated, userId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Account, AuditEvents.AccountUserNameChanged, userId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Account, AuditEvents.AccountPasswordChanged, userId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Account, AuditEvents.AccountActivated, userId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Account, AuditEvents.AccountDeactivated, userId: e.StreamId, level: "Warning"); + + // ── Federation (user-stream mirror events) ─────────────────────── + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Federation, AuditEvents.IdentityLinked, userId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.Federation, AuditEvents.IdentityUnlinked, userId: e.StreamId); + + // ── Admin / realm config (login-provider stream — StreamId == provider id) ── + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.AdminRealm, AuditEvents.LoginProviderAdded, targetId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.AdminRealm, AuditEvents.LoginProviderUpdated, targetId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.AdminRealm, AuditEvents.LoginProviderEnabled, targetId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.AdminRealm, AuditEvents.LoginProviderDisabled, targetId: e.StreamId); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.AdminRealm, AuditEvents.LoginProviderSecretRotated, targetId: e.StreamId, level: "Warning"); + + public AuthAuditView Create(IEvent e) => + Row(e, AuditCategories.AdminRealm, AuditEvents.LoginProviderDeleted, targetId: e.StreamId, level: "Warning"); +} diff --git a/src/dotnet/Modgud.Authentication/AuthLog/AuthLogDocument.cs b/src/dotnet/Modgud.Authentication/AuthLog/AuthLogDocument.cs deleted file mode 100644 index 7c7e02c3..00000000 --- a/src/dotnet/Modgud.Authentication/AuthLog/AuthLogDocument.cs +++ /dev/null @@ -1,52 +0,0 @@ -namespace Modgud.Authentication.AuthLog; - -/// -/// Marten document for persisted auth log entries. -/// -/// Retention & GDPR (LOG-02 / security-hardening tracker): -/// -/// Retention window: 7 days. The -/// AuthLogPersistenceService background worker prunes records -/// older than 7 days on each iteration. After that, the entry is gone -/// from the database — recovery from older logs requires a Postgres -/// backup, which is operationally separate. -/// What's persisted: timestamp, log level, -/// message text, optional user-name (if the calling principal had one), -/// optional source IP. Specifically NOT persisted: cookies, tokens, -/// secrets, full request/response bodies, password values. Auth -/// endpoints log {UserName} + {IP} at Information level -/// for security-event traceability — this is the canonical "who tried -/// what from where" log under the GDPR legitimate-interest basis for -/// detecting and responding to credential abuse. -/// Access control: read access via the -/// modgud:auth-log:read permission, which the seeded -/// help-desk role carries. No public-network exposure of the -/// raw documents. -/// Erasure obligations: when a user invokes -/// GDPR-erasure, their personal references in UserName are -/// masked at the ArchiveStream layer; the entry stays as the -/// security-audit record but is no longer linkable to the individual. -/// Source-IP is treated as personal data and falls under the same -/// 7-day retention window as the rest. -/// -/// -public class AuthLogDocument -{ - public Guid Id { get; init; } = Guid.NewGuid(); - public DateTimeOffset Timestamp { get; init; } - public string Level { get; init; } = "Info"; - public string Message { get; init; } = ""; - public string? UserName { get; init; } - public string? Ip { get; init; } - - /// - /// The realm slug the event was emitted in (captured from the ambient - /// TenantContext by at log time; - /// background / no-tenant work is attributed to system). All entries - /// live in the system DB; this column is what scopes the admin read so a - /// tenant realm-admin sees only their own realm's events while the - /// control-plane realm sees the full cross-realm log. Null only on legacy - /// rows written before this column existed. - /// - public string? Realm { get; init; } -} diff --git a/src/dotnet/Modgud.Authentication/AuthLog/AuthLogService.cs b/src/dotnet/Modgud.Authentication/AuthLog/AuthLogService.cs deleted file mode 100644 index 29938922..00000000 --- a/src/dotnet/Modgud.Authentication/AuthLog/AuthLogService.cs +++ /dev/null @@ -1,129 +0,0 @@ -using System.Threading.Channels; -using Marten; -using Serilog.Core; -using Serilog.Events; -using Modgud.Authentication.AuthLog; - -namespace Modgud.Authentication.AuthLog; - -/// -/// Serilog sink that captures "Auth:" prefixed log entries -/// and forwards them to a channel for async DB persistence. -/// -public class AuthLogSink : ILogEventSink -{ - private readonly Channel _channel = Channel.CreateUnbounded(); - - public ChannelReader Reader => _channel.Reader; - - public void Emit(LogEvent logEvent) - { - if (!logEvent.MessageTemplate.Text.StartsWith("Auth:")) return; - - string? userName = null; - string? ip = null; - string? realm = null; - - if (logEvent.Properties.TryGetValue("UserName", out var userProp)) - userName = userProp.ToString().Trim('"'); - if (logEvent.Properties.TryGetValue("IP", out var ipProp)) - ip = ipProp.ToString().Trim('"'); - // Realm is stamped by RealmLogEnricher from the ambient TenantContext at - // emit time (the sink itself runs tenant-less in a BackgroundService). - if (logEvent.Properties.TryGetValue("Realm", out var realmProp)) - realm = realmProp.ToString().Trim('"'); - - // Render the message with placeholder values substituted in - // (`User={UserName}` → `User=admin`) instead of persisting the raw - // template — otherwise the audit-log message column reads as - // `Login successful User={UserName}` and an admin can't tell who - // logged in without joining columns mentally. UserName and IP are - // already extracted into their own columns above; we still leave - // them in the rendered message because the rest of the message - // can reference them by name (e.g. `Login failed for {UserName} - // from {IP} — wrong password`). - var rendered = logEvent.RenderMessage(); - var message = rendered.StartsWith("Auth: ") ? rendered["Auth: ".Length..] : rendered; - message = message.TrimEnd('.').Trim(); - - _channel.Writer.TryWrite(new AuthLogDocument - { - Timestamp = logEvent.Timestamp, - Level = logEvent.Level switch - { - LogEventLevel.Warning => "Warning", - LogEventLevel.Error or LogEventLevel.Fatal => "Error", - _ => "Info", - }, - Message = message, - UserName = userName, - Ip = ip, - Realm = realm, - }); - } -} - -/// -/// Background service that drains auth log entries from the channel into Marten -/// and periodically cleans up entries older than 7 days. -/// -public class AuthLogPersistenceService(IServiceProvider services, AuthLogSink sink) : BackgroundService -{ - private static readonly TimeSpan RetentionPeriod = TimeSpan.FromDays(7); - private static readonly TimeSpan CleanupInterval = TimeSpan.FromHours(1); - - protected override async Task ExecuteAsync(CancellationToken stoppingToken) - { - _ = CleanupLoop(stoppingToken); - - await foreach (var entry in sink.Reader.ReadAllAsync(stoppingToken)) - { - try - { - using var scope = services.CreateScope(); - // AuthLog runs out-of-band in a HostedService — there is no - // HttpContext to drive tenant resolution, so target the master - // ("system") tenant explicitly. AuthLog documents live in the - // master DB by design (cross-tenant audit log). - await using var session = scope.ServiceProvider - .GetRequiredService() - .LightweightSession("system"); - - session.Store(entry); - await session.SaveChangesAsync(stoppingToken); - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - Serilog.Log.Error(ex, "Failed to persist auth log entry"); - } - } - } - - private async Task CleanupLoop(CancellationToken stoppingToken) - { - while (!stoppingToken.IsCancellationRequested) - { - try - { - using var scope = services.CreateScope(); - // AuthLog runs out-of-band in a HostedService — there is no - // HttpContext to drive tenant resolution, so target the master - // ("system") tenant explicitly. AuthLog documents live in the - // master DB by design (cross-tenant audit log). - await using var session = scope.ServiceProvider - .GetRequiredService() - .LightweightSession("system"); - - var cutoff = DateTimeOffset.UtcNow - RetentionPeriod; - session.DeleteWhere(x => x.Timestamp < cutoff); - await session.SaveChangesAsync(stoppingToken); - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - Serilog.Log.Error(ex, "Failed to cleanup old auth log entries"); - } - - await Task.Delay(CleanupInterval, stoppingToken); - } - } -} diff --git a/src/dotnet/Modgud.Authentication/AuthLog/ErrorFeedSink.cs b/src/dotnet/Modgud.Authentication/AuthLog/ErrorFeedSink.cs new file mode 100644 index 00000000..69b9f341 --- /dev/null +++ b/src/dotnet/Modgud.Authentication/AuthLog/ErrorFeedSink.cs @@ -0,0 +1,97 @@ +using System.Diagnostics; +using Modgud.Infrastructure.Observability; +using Serilog.Core; +using Serilog.Events; + +namespace Modgud.Authentication.AuthLog; + +/// +/// Serilog sink that feeds the in-app per-realm live error feed +/// (logging/audit redesign Phase 5, §B.3). Captures qualifying log events into +/// the process-local , which the +/// ObservabilityHub.LogsSubscribe stream and the +/// /api/admin/observability/errors snapshot read per realm. +/// +/// Scope (Open Decision #7 — operator choice): by default only +/// Error+ events from Modgud.* loggers are captured — the quiet +/// "an application error happened on my realm" feed. Framework loggers +/// (Marten / Npgsql / Wolverine / Microsoft / System) are excluded, so +/// infrastructure failures surface in Console / File / OpenObserve but not in +/// this in-app panel. Both the level floor and the source prefix are +/// configurable (Observability__ErrorFeed__MinimumLevel / +/// __SourcePrefix) so this can be widened without a code change. +/// +/// The realm tag comes from the -stamped +/// Realm property (falls back to system). Records are rendered +/// and length-capped here so the buffer keeps only display-safe strings and a +/// bounded footprint — no live / exception graph is +/// retained. +/// +/// Best-effort: a capture failure must never break logging, so +/// swallows. This feed does NOT pass through the OTel +/// collector redaction — the call-site PII belt + per-realm read scoping are +/// the controls (mirrors the streamless security store; see §B.3). +/// +public sealed class ErrorFeedSink : ILogEventSink +{ + private const int MaxMessageLength = 1000; + private const int MaxExceptionLength = 1000; + + private readonly RealmErrorBuffer _buffer; + private readonly LogEventLevel _minimumLevel; + private readonly string _sourcePrefix; + + public ErrorFeedSink(RealmErrorBuffer buffer, LogEventLevel minimumLevel, string sourcePrefix) + { + _buffer = buffer; + _minimumLevel = minimumLevel; + _sourcePrefix = sourcePrefix; + } + + public void Emit(LogEvent logEvent) + { + try + { + if (logEvent.Level < _minimumLevel) return; + + // Source filter: only loggers under the configured prefix. A log + // with no SourceContext (e.g. a static Log.Error) is excluded. + var sourceContext = ReadScalarString(logEvent, "SourceContext"); + if (sourceContext is null || + !sourceContext.StartsWith(_sourcePrefix, StringComparison.Ordinal)) + return; + + var realm = ReadScalarString(logEvent, "Realm") ?? "system"; + var message = Truncate(logEvent.RenderMessage(), MaxMessageLength); + var exception = logEvent.Exception is { } ex + ? Truncate($"{ex.GetType().Name}: {ex.Message}", MaxExceptionLength) + : null; + + // Trace correlation with OpenObserve (the OTLP sink reads the same + // ambient Activity). Present only when a trace is in flight. + var traceId = Activity.Current?.TraceId.ToString(); + + _buffer.Record(new ErrorLogEntry( + logEvent.Timestamp, + realm, + logEvent.Level.ToString(), + message, + exception, + sourceContext, + traceId)); + } + catch + { + // Never let the live-feed capture break the logging pipeline. + } + } + + private static string? ReadScalarString(LogEvent logEvent, string name) + => logEvent.Properties.TryGetValue(name, out var value) + && value is ScalarValue { Value: string s } + ? s + : null; + + private static string Truncate(string value, int max) + => value.Length <= max ? value : value[..max] + "…"; +} diff --git a/src/dotnet/Modgud.Authentication/AuthLog/RealmLogEnricher.cs b/src/dotnet/Modgud.Authentication/AuthLog/RealmLogEnricher.cs index 5861cc8b..4b51bef4 100644 --- a/src/dotnet/Modgud.Authentication/AuthLog/RealmLogEnricher.cs +++ b/src/dotnet/Modgud.Authentication/AuthLog/RealmLogEnricher.cs @@ -8,12 +8,16 @@ namespace Modgud.Authentication.AuthLog; /// Serilog enricher that stamps every log event with the ambient realm slug /// () as a Realm property. /// -/// The enrichment happens synchronously at emit time, on the request -/// thread where the tenant is set in AsyncLocal — so the realm travels WITH the -/// to the sink. This is essential because -/// AuthLogPersistenceService drains out-of-band -/// in a BackgroundService that has no TenantContext; the attribution must -/// be captured here, not at persist time. +/// Kept after the "Auth:" audit sink was retired (logging/audit +/// redesign Phase 3): the tenant audit no longer flows through Serilog, but the +/// realm tag is still how OPERATIONAL logs are attributed — Console/File today, and +/// the Phase-4 OTel Logs export tomorrow. The streamless security store captures its +/// own realm directly from TenantContext at emit, independent of this. +/// +/// The enrichment happens synchronously at emit time, on the request thread +/// where the tenant is set in AsyncLocal, so the realm travels WITH the +/// to every sink — including ones that run out-of-band with +/// no TenantContext. /// /// Uses (which falls back to the /// system tenant) rather than the nullable variant, so background / no- @@ -21,15 +25,10 @@ namespace Modgud.Authentication.AuthLog; /// instead of being orphaned with no realm at all. /// /// Attribution is dual-sourced. -/// does not overwrite — so a log call whose Auth: message template binds -/// its own {Realm} placeholder wins, and the enricher's ambient value is -/// the fallback for the (majority) of sites that don't. This is intentional: the -/// realm-iterating background jobs (e.g. the signing-key janitor / DCR GC) run in -/// a single system session and bind the iterated realm.Slug in -/// their template, which is the CORRECT per-realm attribution that the ambient -/// system fallback could not give. Convention: any {Realm} -/// bound in an Auth: template MUST be the realm the event pertains to — -/// never another realm — because that value scopes who sees the row. +/// does not overwrite — so a log call that binds its own {Realm} placeholder +/// wins, and the enricher's ambient value is the fallback for sites that don't. A +/// realm-iterating background job running in a single system session can thus +/// bind the iterated realm.Slug for correct per-realm attribution. /// public sealed class RealmLogEnricher : ILogEventEnricher { diff --git a/src/dotnet/Modgud.Authentication/Events/IdentityEvents.cs b/src/dotnet/Modgud.Authentication/Events/IdentityEvents.cs index 9331feb2..11643dcb 100644 --- a/src/dotnet/Modgud.Authentication/Events/IdentityEvents.cs +++ b/src/dotnet/Modgud.Authentication/Events/IdentityEvents.cs @@ -41,12 +41,29 @@ public record UserPasswordChangedEvent( public record UserLoggedInEvent( Guid UserId, - string? IpAddress); + string? IpAddress, + // Non-PII login-method code (ModgudMeters.LoginMethod.* — "password", + // "magic_link", "external", …). Trailing optional so old event streams and + // existing construction sites default to null ("not recorded"). A method + // switch / first login via a new provider is itself a security signal, so + // the audit view surfaces it. + string? Method = null); public record UserLoginFailedEvent( Guid UserId, string? IpAddress); +// Aggregated known-user login-failure record (audit redesign Decision (b)): ONE +// event per resolved failure streak — emitted when the access-failed counter +// resets to 0 (successful sign-in / unlock) — NOT one per attempt. Avoids stream +// spam and the amplification vector (an attacker spraying a victim can't inflate +// that victim's stream per attempt). No PII (count + timestamp); lives on the +// user stream, so it erases with the subject. +public record UserLoginFailuresObservedEvent( + Guid UserId, + int FailedCount, + DateTimeOffset ObservedAt); + public record UserLockedOutEvent( Guid UserId, DateTimeOffset LockoutEnd); diff --git a/src/dotnet/Modgud.Authentication/Gdpr/GdprService.cs b/src/dotnet/Modgud.Authentication/Gdpr/GdprService.cs index def5b137..b1cad0e0 100644 --- a/src/dotnet/Modgud.Authentication/Gdpr/GdprService.cs +++ b/src/dotnet/Modgud.Authentication/Gdpr/GdprService.cs @@ -335,6 +335,24 @@ await store.Advanced.ApplyEventDataMasking(x => archiveSession.Events.ArchiveStream(linkId); await archiveSession.SaveChangesAsync(ct); + // 5) Audit trail — mask-and-keep, NOT delete. The tenant audit is retained + // de-identified (Art-17(3)): the source events are now masked + archived, + // and AuthAuditViewProjection.IncludeArchivedEvents makes a rebuild + // regenerate these rows from the masked events. But masking appends no new + // event, so the LIVE (already-projected) rows still hold the pre-mask IP — + // null it here so the live view is immediately de-identified and identical + // to what an archived-inclusive rebuild produces. (Ip is the only PII + // column today; UserName is null, UserId is a pseudonymous tombstone key.) + await using (var auditSession = store.LightweightSession(tenantId)) + { + var auditRows = await auditSession.Query() + .Where(r => r.UserId == userId && r.Ip != null) + .ToListAsync(ct); + foreach (var row in auditRows) + auditSession.Store(row with { Ip = null }); + await auditSession.SaveChangesAsync(ct); + } + ModgudMeters.RecordGdprRequest(ModgudMeters.GdprRequestType.Mask); return true; diff --git a/src/dotnet/Modgud.Authentication/Identity/EventSourcedUserStore.cs b/src/dotnet/Modgud.Authentication/Identity/EventSourcedUserStore.cs index 0d311d2f..1d11929b 100644 --- a/src/dotnet/Modgud.Authentication/Identity/EventSourcedUserStore.cs +++ b/src/dotnet/Modgud.Authentication/Identity/EventSourcedUserStore.cs @@ -377,6 +377,17 @@ private void AppendSecurityChangeEvents(ApplicationUser user, UserSecurityData s } } + // A failure streak just resolved — the access-failed counter went from >0 + // back to 0 (a successful sign-in or an unlock reset it). Record it as ONE + // aggregated audit event (Decision (b)), not one per attempt: no stream spam, + // and an attacker spraying a victim can't inflate that victim's stream. No IP + // (the aggregate has no single source); erasable with the user's stream. + if (securityData.AccessFailedCount > 0 && user.AccessFailedCount == 0) + { + events.Add(new UserLoginFailuresObservedEvent( + user.Id, securityData.AccessFailedCount, DateTimeOffset.UtcNow)); + } + if (events.Count > 0) { session.Events.Append(user.Id, events.ToArray()); diff --git a/src/dotnet/Modgud.Authentication/Identity/ExternalAuth/UserUpdateScriptRunner.cs b/src/dotnet/Modgud.Authentication/Identity/ExternalAuth/UserUpdateScriptRunner.cs index eab7e1aa..a4ddaad0 100644 --- a/src/dotnet/Modgud.Authentication/Identity/ExternalAuth/UserUpdateScriptRunner.cs +++ b/src/dotnet/Modgud.Authentication/Identity/ExternalAuth/UserUpdateScriptRunner.cs @@ -44,7 +44,7 @@ public UserUpdateResult Run(string script, IReadOnlyDictionary { if (string.IsNullOrWhiteSpace(script)) { - _logger.LogWarning("Auth: UserUpdateScript called with empty script — no patch produced"); + _logger.LogWarning("UserUpdateScript called with empty script — no patch produced"); return UserUpdateResult.Failed("script is empty"); } @@ -66,17 +66,17 @@ public UserUpdateResult Run(string script, IReadOnlyDictionary } catch (JavaScriptException jsEx) { - _logger.LogWarning(jsEx, "Auth: UserUpdateScript error: {Message}", jsEx.Message); + _logger.LogWarning(jsEx, "UserUpdateScript error: {Message}", jsEx.Message); return UserUpdateResult.Failed(jsEx.Message); } catch (TimeoutException) { - _logger.LogWarning("Auth: UserUpdateScript timed out after {Ms}ms", ScriptTimeout.TotalMilliseconds); + _logger.LogWarning("UserUpdateScript timed out after {Ms}ms", ScriptTimeout.TotalMilliseconds); return UserUpdateResult.Failed("script timed out"); } catch (Exception ex) { - _logger.LogError(ex, "Auth: UserUpdateScript unexpected error"); + _logger.LogError(ex, "UserUpdateScript unexpected error"); return UserUpdateResult.Failed(ex.Message); } } diff --git a/src/dotnet/Modgud.Authentication/Identity/LogPiiMasking.cs b/src/dotnet/Modgud.Authentication/Identity/LogPiiMasking.cs index ae33e0bf..a7ab417b 100644 --- a/src/dotnet/Modgud.Authentication/Identity/LogPiiMasking.cs +++ b/src/dotnet/Modgud.Authentication/Identity/LogPiiMasking.cs @@ -33,4 +33,23 @@ public static string MaskEmail(string? email) var firstChar = local[0]; return $"{firstChar}***@{domain}"; } + + /// + /// Mask a login identifier for log/audit lines about an unidentified + /// actor — e.g. a failed login for a user that does not exist, where the + /// attempted handle is attacker-supplied and may itself be a real person's + /// email or username. Email-shaped input is masked via ; + /// anything else keeps only its first character. Empty/null returns a neutral + /// placeholder. + /// + /// For an identified user, do NOT mask — log user.Id + /// (a GUID that erasure tombstones) instead of the username; that keeps the + /// log PII-free without losing the ability to correlate. + /// + public static string MaskUsername(string? identifier) + { + if (string.IsNullOrWhiteSpace(identifier)) return "(none)"; + var s = identifier.Trim(); + return s.Contains('@') ? MaskEmail(s) : $"{s[0]}***"; + } } diff --git a/src/dotnet/Modgud.Authentication/Identity/LoginProviders/Saml/SamlMetadataFetcher.cs b/src/dotnet/Modgud.Authentication/Identity/LoginProviders/Saml/SamlMetadataFetcher.cs index 70f1bc29..80db9286 100644 --- a/src/dotnet/Modgud.Authentication/Identity/LoginProviders/Saml/SamlMetadataFetcher.cs +++ b/src/dotnet/Modgud.Authentication/Identity/LoginProviders/Saml/SamlMetadataFetcher.cs @@ -59,7 +59,7 @@ public SamlMetadataFetcher(IHttpClientFactory httpFactory, ILogger _logger; public SamlSpCertificateService( IDocumentSession session, SamlSpCertificateStore store, TimeProvider clock, + ISecurityAuditLog securityAudit, ILogger logger) { _session = session; _store = store; _clock = clock; + _securityAudit = securityAudit; _logger = logger; } @@ -210,9 +214,15 @@ public async Task RotateAsync(CancellationToken ct = default) _session.Store(doc); await _session.SaveChangesAsync(ct); - _logger.LogInformation( - "Auth: Rotated SAML SP cert for realm {Realm} — new thumbprint {Thumbprint}, valid until {NotAfter:o}", - realmSlug, doc.ActiveCertThumbprint, doc.ActiveCertNotAfter); + _securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.SamlCertRotated, + Realm = realmSlug, + Level = "Info", + Status = "rotated", + Reason = $"thumbprint {doc.ActiveCertThumbprint}, notAfter {doc.ActiveCertNotAfter:o}", + Message = $"Rotated SAML SP cert — new thumbprint {doc.ActiveCertThumbprint}, valid until {doc.ActiveCertNotAfter:o}", + }); return newCert; } @@ -240,7 +250,7 @@ public async Task RetireExpiredPreviousAsync(CancellationToken ct = defaul await _session.SaveChangesAsync(ct); _logger.LogInformation( - "Auth: Retired previous SAML SP cert (thumbprint {Thumbprint})", oldThumb); + "Retired previous SAML SP cert (thumbprint {Thumbprint})", oldThumb); return true; } @@ -276,9 +286,15 @@ private async Task LoadOrCreateAsync(CancellationToke _session.Store(doc); await _session.SaveChangesAsync(ct); - _logger.LogInformation( - "Auth: Generated initial SAML SP cert for realm {Realm} — thumbprint {Thumbprint}, valid until {NotAfter:o}", - realmSlug, doc.ActiveCertThumbprint, doc.ActiveCertNotAfter); + _securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.SamlCertRotated, + Realm = realmSlug, + Level = "Info", + Status = "generated", + Reason = $"initial cert, thumbprint {doc.ActiveCertThumbprint}", + Message = $"Generated initial SAML SP cert — thumbprint {doc.ActiveCertThumbprint}, valid until {doc.ActiveCertNotAfter:o}", + }); return doc; } diff --git a/src/dotnet/Modgud.Authentication/RealmSettings/RealmSettingsService.cs b/src/dotnet/Modgud.Authentication/RealmSettings/RealmSettingsService.cs index 1fc812df..b6b1df57 100644 --- a/src/dotnet/Modgud.Authentication/RealmSettings/RealmSettingsService.cs +++ b/src/dotnet/Modgud.Authentication/RealmSettings/RealmSettingsService.cs @@ -77,6 +77,13 @@ public async Task> PatchAsync(UpdateRealmSettingsDto d doc.Deletion = deletion.Value; } + if (dto.Audit is not null) + { + var audit = ApplyAuditPatch(doc.Audit, dto.Audit); + if (audit.IsError) return audit.FirstError; + doc.Audit = audit.Value; + } + if (!isCreate) doc.UpdatedAt = DateTimeOffset.UtcNow; session.Store(doc); @@ -116,6 +123,7 @@ private SelfRegistrationSettings ApplySelfRegistrationPatch( Dcr = MapDcrToDto(doc.Dcr), Branding = MapBrandingToDto(doc.Branding), Deletion = MapDeletionToDto(doc.Deletion), + Audit = MapAuditToDto(doc.Audit), Pages = doc.Pages is null ? new Dictionary() : new Dictionary(doc.Pages), @@ -255,6 +263,22 @@ internal static DeletionSettingsDto MapDeletionToDto(DeletionSettings? s) }; } + private static ErrorOr ApplyAuditPatch(AuditSettings? current, UpdateAuditSettingsDto patch) + { + var s = current ?? new AuditSettings(); + var merged = s with { VisibilityWindowDays = patch.VisibilityWindowDays ?? s.VisibilityWindowDays }; + if (merged.VisibilityWindowDays < 1) + return Error.Validation("Audit.InvalidVisibilityWindowDays", + "VisibilityWindowDays must be at least 1."); + return merged; + } + + internal static AuditSettingsDto MapAuditToDto(AuditSettings? s) + { + s ??= AuditSettings.Defaults; + return new AuditSettingsDto { VisibilityWindowDays = s.VisibilityWindowDays }; + } + internal static DcrSettingsDto MapDcrToDto(DcrSettings? s) { if (s is null) return new DcrSettingsDto(); diff --git a/src/dotnet/Modgud.Authentication/Sessions/UserAccessRevoker.cs b/src/dotnet/Modgud.Authentication/Sessions/UserAccessRevoker.cs index 376b1667..46968337 100644 --- a/src/dotnet/Modgud.Authentication/Sessions/UserAccessRevoker.cs +++ b/src/dotnet/Modgud.Authentication/Sessions/UserAccessRevoker.cs @@ -49,11 +49,11 @@ public async Task RevokeAllAccessAsync(Guid userId, AccessRevocationReason reaso await userManager.UpdateSecurityStampAsync(user!); else logger.LogWarning( - "Auth: security-stamp rotation skipped for user {UserId} (reason={Reason}) — no loadable ApplicationUser; existing auth cookies are not force-expired by this revoke (tokens + sessions were still revoked)", + "security-stamp rotation skipped for user {UserId} (reason={Reason}) — no loadable ApplicationUser; existing auth cookies are not force-expired by this revoke (tokens + sessions were still revoked)", userId, reason); logger.LogInformation( - "Auth: revoked access for user {UserId} (reason={Reason}, tokens={TokenCount}, authorizations={AuthorizationCount}, stampRotated={StampRotated})", + "revoked access for user {UserId} (reason={Reason}, tokens={TokenCount}, authorizations={AuthorizationCount}, stampRotated={StampRotated})", userId, reason, tokens, authorizations, stampRotated); } } diff --git a/src/dotnet/Modgud.Authentication/Setup/MartenStoreOptionsExtensions.cs b/src/dotnet/Modgud.Authentication/Setup/MartenStoreOptionsExtensions.cs index fdf57be3..26a68419 100644 --- a/src/dotnet/Modgud.Authentication/Setup/MartenStoreOptionsExtensions.cs +++ b/src/dotnet/Modgud.Authentication/Setup/MartenStoreOptionsExtensions.cs @@ -1,6 +1,5 @@ using JasperFx.Events.Projections; using Marten; -using Modgud.Authentication.AuthLog; using Modgud.Authentication.Domain; using Modgud.Authentication.Domain.ExternalAuth; using Modgud.Authentication.Domain.ExternalAuth.Events; @@ -124,15 +123,15 @@ public static StoreOptions UseModgudAuthentication(this StoreOptions options) .Index(x => x.LoginProviderId) .Index(x => x.IsUnlinked); - // AuthLogDocument lives in the default (public) schema like every other - // auth doc — dropped the gratuitous solo "marten" schema (one schema - // fewer per tenant DB; aligns with AppBase v4). - options.Schema.For() + // Streamless security/ops store (logging/audit redesign Track A, Phase 3). + // Cross-realm in the system DB; the typed successor to the personal-data- + // bearing-but-streamless portion of AuthLogDocument. Indexed for the admin + // read (Realm scope + EventType chip filter) and the retention prune. + options.Schema.For() .Identity(x => x.Id) .Index(x => x.Timestamp) - // All realms' entries share the system DB, so the admin read/clear - // filters by Realm — index it so the tenant-scoped path isn't a scan. - .Index(x => x.Realm); + .Index(x => x.Realm) + .Index(x => x.EventType); // Tenant-scoped singleton config doc. One row per tenant DB, // addressed by the fixed `RealmSettings.SingletonId`. Owned by @@ -152,6 +151,7 @@ public static StoreOptions UseModgudAuthentication(this StoreOptions options) options.Events.MapEventType("user_password_changed"); options.Events.MapEventType("user_logged_in"); options.Events.MapEventType("user_login_failed"); + options.Events.MapEventType("user_login_failures_observed"); options.Events.MapEventType("user_locked_out"); options.Events.MapEventType("user_unlocked"); options.Events.MapEventType("user_activated"); @@ -210,9 +210,10 @@ public static StoreOptions UseModgudAuthentication(this StoreOptions options) options.Events.AddMaskingRuleForProtectedInformation(e => new UserIdentitySetupEvent(e.UserId, "[DELETED]", e.IsActive)); - // IP addresses are PII under GDPR — strip them from login records. + // IP addresses are PII under GDPR — strip them from login records. The + // login method is non-PII (a bounded code), so it passes through the mask. options.Events.AddMaskingRuleForProtectedInformation(e => - new UserLoggedInEvent(e.UserId, IpAddress: null)); + new UserLoggedInEvent(e.UserId, IpAddress: null, e.Method)); options.Events.AddMaskingRuleForProtectedInformation(e => new UserLoginFailedEvent(e.UserId, IpAddress: null)); @@ -245,6 +246,13 @@ public static StoreOptions UseModgudAuthentication(this StoreOptions options) projection.Add(); }); + // Tenant audit read model — a flat, per-event projection (one row per + // audited event) over the user + config streams; queried per realm by the + // GDPR-audit read surface (Phase 2). Deliberately an EventProjection, not an + // aggregation: an audit log is a list of occurrences, not a per-aggregate + // snapshot. See dev-docs/future-features/logging-audit-redesign.md §A.3. + options.Projections.Add(ProjectionLifecycle.Async); + return options; } } diff --git a/src/dotnet/Modgud.Authentication/Setup/PendingAdminInviteService.cs b/src/dotnet/Modgud.Authentication/Setup/PendingAdminInviteService.cs index 7a034cae..d814fef4 100644 --- a/src/dotnet/Modgud.Authentication/Setup/PendingAdminInviteService.cs +++ b/src/dotnet/Modgud.Authentication/Setup/PendingAdminInviteService.cs @@ -4,6 +4,7 @@ using Modgud.Authentication.Domain; using Modgud.Authentication.Identity; using Modgud.Domain.Realms; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Email; using Modgud.Infrastructure.Persistence.Tenancy; using ErrorOr; @@ -82,6 +83,7 @@ public sealed class PendingAdminInviteService( IRealmAdminBootstrapper bootstrapper, IEmailService emailService, IWebHostEnvironment env, + ISecurityAuditLog securityAudit, ILogger logger) : IPendingAdminInviteService { public async Task IssueAsync( @@ -157,13 +159,19 @@ await emailService.SendTemplatedEmailAsync( catch (Exception ex) { logger.LogWarning(ex, - "Auth: Bootstrap-invite issued but email delivery failed. Realm={Realm} Email={MaskedEmail}. The plaintext URL is still on the issuer's side.", + "Bootstrap-invite issued but email delivery failed. Realm={Realm} Email={MaskedEmail}. The plaintext URL is still on the issuer's side.", realm.Slug, LogPiiMasking.MaskEmail(normalizedEmail)); } - logger.LogInformation( - "Auth: Bootstrap-invite issued. Realm={Realm} UserName={UserName} Email={MaskedEmail} ExpiresAt={ExpiresAt} IssuedBy={IssuedBy}", - realm.Slug, normalizedUserName, LogPiiMasking.MaskEmail(normalizedEmail), invite.ExpiresAt, issuedBy ?? "(self/CLI)"); + securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.BootstrapInviteIssued, + Level = "Info", + Actor = LogPiiMasking.MaskEmail(normalizedEmail), + Status = "issued", + Reason = $"expires {invite.ExpiresAt}, issued by {issuedBy ?? "(self/CLI)"}", + Message = "Bootstrap invite issued", + }); return new IssuedInvite(invite.Id, token, url, invite.ExpiresAt, normalizedEmail, normalizedUserName); } @@ -205,8 +213,8 @@ public async Task> ConsumeAsync( await session.SaveChangesAsync(ct); logger.LogInformation( - "Auth: Bootstrap-invite consumed. UserName={UserName} Email={Email}", - invite.UserName, invite.Email); + "Bootstrap-invite consumed. UserId={UserId} Email={MaskedEmail}", + bootstrapResult.Value.UserId, LogPiiMasking.MaskEmail(invite.Email)); return bootstrapResult.Value; } diff --git a/src/dotnet/Modgud.Authentication/Setup/RealmAdminBootstrapper.cs b/src/dotnet/Modgud.Authentication/Setup/RealmAdminBootstrapper.cs index ec599a8b..21b35bad 100644 --- a/src/dotnet/Modgud.Authentication/Setup/RealmAdminBootstrapper.cs +++ b/src/dotnet/Modgud.Authentication/Setup/RealmAdminBootstrapper.cs @@ -170,6 +170,7 @@ Guid CatalogId(string resource, string action) CatalogId("authorization-group", "read"), CatalogId("permission-role", "read"), CatalogId("auth-log", "read"), + CatalogId("audit-log", "read"), }; var userManagerRole = new PermissionRole { diff --git a/src/dotnet/Modgud.Authorization/README.md b/src/dotnet/Modgud.Authorization/README.md index 07d0a6a1..84183bc4 100644 --- a/src/dotnet/Modgud.Authorization/README.md +++ b/src/dotnet/Modgud.Authorization/README.md @@ -247,8 +247,9 @@ Treat that as inspiration, not a drop-in. links, OIDC. Lives in the `Modgud.Authentication` slice. - **User profile management** — display fields, change-requests, profile self-service. Lives in `Modgud.Api/Features/Account` + `Admin`. -- **Auth log / audit** — `AuthLogDocument` + `AuthLogSink` are - Modgud-internal, not part of the slice. +- **Auth log / audit** — the `AuthAuditView` projection (GDPR-audit) and the + `SecurityAuditEntry` streamless security store are Modgud-internal, not part + of the slice. - **Frontend** — see Step 6 above. The split is intentional: this slice owns "**who has what permission, who's diff --git a/src/dotnet/Modgud.Authorization/Services/LoginTimeMembershipDeriver.cs b/src/dotnet/Modgud.Authorization/Services/LoginTimeMembershipDeriver.cs index f4d8aee4..49716cbc 100644 --- a/src/dotnet/Modgud.Authorization/Services/LoginTimeMembershipDeriver.cs +++ b/src/dotnet/Modgud.Authorization/Services/LoginTimeMembershipDeriver.cs @@ -37,7 +37,9 @@ Task DeriveAsync( CancellationToken ct = default); } -public sealed record DerivedMembershipResult(IReadOnlyList MatchedGroupIds) +public sealed record DerivedMembershipResult( + IReadOnlyList MatchedGroupIds, + int DroppedRealmAdminCount = 0) { public static readonly DerivedMembershipResult Empty = new([]); } @@ -96,29 +98,34 @@ public async Task DeriveAsync( // write-time config guard should already forbid an ExternallyDrivable // group from conferring realm:admin — defensively drop any that slipped // through (e.g. a role flipped IsRealmAdmin after the group was marked). - var safe = await StripRealmAdminConferringAsync(matched, ct); - return new DerivedMembershipResult([.. safe.Select(g => g.Id)]); + var (safe, dropped) = await StripRealmAdminConferringAsync(matched, ct); + return new DerivedMembershipResult([.. safe.Select(g => g.Id)], dropped); } - private async Task> StripRealmAdminConferringAsync( + // Returns the surviving groups plus the count of realm:admin-conferring groups + // that were defensively dropped. The caller (which can reach the audit store — + // this Authorization layer cannot) turns a non-zero count into a + // security.privilege_escalation_blocked audit record; here it stays a plain + // diagnostic log. + private async Task<(IReadOnlyList Safe, int Dropped)> StripRealmAdminConferringAsync( List groups, CancellationToken ct) { var roleIds = groups.SelectMany(g => g.RoleIds).Distinct().ToList(); - if (roleIds.Count == 0) return groups; + if (roleIds.Count == 0) return (groups, 0); var realmAdminRoleIds = (await session.Query() .Where(r => roleIds.Contains(r.Id) && r.IsRealmAdmin) .ToListAsync(ct)) .Select(r => r.Id) .ToHashSet(); - if (realmAdminRoleIds.Count == 0) return groups; + if (realmAdminRoleIds.Count == 0) return (groups, 0); var safe = groups.Where(g => !g.RoleIds.Any(realmAdminRoleIds.Contains)).ToList(); var dropped = groups.Count - safe.Count; if (dropped > 0) logger.LogWarning( - "Auth: dropped {Count} externally-derived group(s) conferring realm:admin (config guard should have prevented this)", + "dropped {Count} externally-derived group(s) conferring realm:admin (config guard should have prevented this)", dropped); - return safe; + return (safe, dropped); } } diff --git a/src/dotnet/Modgud.Domain/RealmSettings/RealmSettings.cs b/src/dotnet/Modgud.Domain/RealmSettings/RealmSettings.cs index 60c3d87c..43a6ae02 100644 --- a/src/dotnet/Modgud.Domain/RealmSettings/RealmSettings.cs +++ b/src/dotnet/Modgud.Domain/RealmSettings/RealmSettings.cs @@ -53,6 +53,14 @@ public class RealmSettings /// it as . public DeletionSettings? Deletion { get; set; } + /// Per-realm tenant-audit visibility window (audit redesign §A.6). + /// Null = never configured; callers read it as + /// . A *visibility* window over the + /// rebuildable AuthAuditView — it bounds what the read surface shows, it + /// does NOT delete history (the source events live with the aggregate, masked on + /// erase). + public AuditSettings? Audit { get; set; } + /// Page-builder schemas keyed by SPA-page-slug /// (login, logout, password-forgot, …). Each /// value is the serialised PageNode tree as JSON. Missing key diff --git a/src/dotnet/Modgud.Domain/Realms/AuditSettings.cs b/src/dotnet/Modgud.Domain/Realms/AuditSettings.cs new file mode 100644 index 00000000..9e2ba3e1 --- /dev/null +++ b/src/dotnet/Modgud.Domain/Realms/AuditSettings.cs @@ -0,0 +1,25 @@ +namespace Modgud.Domain.Realms; + +/// +/// Per-realm tenant-audit policy, owned by the realm-admin. A nullable JSONB +/// sub-record on the tenant-DB RealmSettings aggregate (adding fields needs +/// no migration). Null on the parent = never configured; callers read it as +/// . +/// +/// Visibility window, NOT retention/deletion. The audit trail is a +/// rebuildable projection (AuthAuditView) over event streams we keep for the +/// aggregate's lifetime (masked on erase). This window only bounds what the read +/// surface *shows* — it does not delete history. Named VisibilityWindowDays +/// (not "RetentionDays") on purpose, so a realm-admin reading the setting can't +/// mistake it for a deletion guarantee — see the design doc §A.6. +/// +public record AuditSettings +{ + /// How many days back the tenant audit read surface shows. Older rows + /// are hidden from the view (not deleted). Must be at least 1. + public int VisibilityWindowDays { get; init; } = 90; + + /// Shared defaults used when a realm has never configured the audit + /// window. Matches the property initializer above. + public static AuditSettings Defaults { get; } = new(); +} diff --git a/src/dotnet/Modgud.Infrastructure/Audit/AuditCategories.cs b/src/dotnet/Modgud.Infrastructure/Audit/AuditCategories.cs new file mode 100644 index 00000000..eee8ef55 --- /dev/null +++ b/src/dotnet/Modgud.Infrastructure/Audit/AuditCategories.cs @@ -0,0 +1,34 @@ +namespace Modgud.Infrastructure.Audit; + +/// +/// Top-level audit categories. Drive the SPA filter chips and group the +/// vocabulary. Stable string codes (not display +/// text) — localise in the frontend, never compare against display strings. +/// +/// Lives in Modgud.Infrastructure (not Authentication, where +/// the Phase-0/2 stream-backed view lives) because the Phase-3 streamless +/// security/ops store has emit call sites in lower layers — notably +/// RealmProvisioningService in Infrastructure — that must reference these +/// codes without a magic string. Infrastructure is the lowest layer every call +/// site (Infrastructure / Authentication / Api) can reach. +/// +public static class AuditCategories +{ + // ── Stream-backed (Track A — the GDPR-audit projection, AuthAuditView) ── + public const string Authentication = "authentication"; + public const string Account = "account"; + public const string Federation = "federation"; + public const string AdminRealm = "admin-realm"; + public const string DcrOAuth = "dcr-oauth"; + + // ── Streamless (Track A — the security/ops store, SecurityAuditEntry) ── + /// Tenant-relevant security threats with no aggregate stream: + /// unknown-actor login attempts, probes, rate-limit hits, policy rejections, + /// and the audit-of-the-audit records. + public const string SecurityOps = "security-ops"; + + /// Operational actions (key/cert rotation, recovery-CLI, realm + /// provisioning, sweeps). Some are tenant-visible, the cross-realm infra ones + /// are control-plane-only — see . + public const string Operations = "operations"; +} diff --git a/src/dotnet/Modgud.Infrastructure/Audit/AuditEvents.cs b/src/dotnet/Modgud.Infrastructure/Audit/AuditEvents.cs new file mode 100644 index 00000000..4727d73f --- /dev/null +++ b/src/dotnet/Modgud.Infrastructure/Audit/AuditEvents.cs @@ -0,0 +1,215 @@ +namespace Modgud.Infrastructure.Audit; + +/// +/// Canonical, stable event-type codes for the tenant audit trail. The successor +/// to the "Auth:"-message-prefix vocabulary: both the projection that writes +/// AuthAuditView rows and the streamless security/ops store reference these +/// constants, so a rename can't silently desync a writer from a reader. +/// +/// Two families, one vocabulary. The auth.* / account.* / +/// federation.* / admin.* codes name occurrences on the user- and config- +/// aggregate streams (projected into the per-realm GDPR-audit view). The +/// security.* / ops.* / audit.* codes name streamless occurrences +/// (no aggregate to attach to) routed to the cross-realm +/// SecurityAuditEntry store under a legitimate-interest basis with short +/// retention. The boundary is about whether a stream exists, not whether the data +/// is personal — see dev-docs/future-features/logging-audit-redesign.md §. +/// +/// PII discipline: these name occurrences, not payloads. The +/// stream-backed rows store only metadata (who/when/what-kind/realm) and inherit +/// per-subject GDPR masking from the source events. The streamless rows may carry +/// an attempted identifier / IP (personal data under CJEU Breyer) — the +/// short retention window is the proportionality control, not per-subject erase. +/// +public static class AuditEvents +{ + // ───────────────────────────────────────────────────────────────────── + // Stream-backed (Track A — projected into AuthAuditView, GDPR-erasable) + // ───────────────────────────────────────────────────────────────────── + + // ── Authentication (user-stream) ───────────────────────────────── + /// A successful login. Marker only — IP/device live in the + /// Sessions feature, not on the event. Fields: UserId. + public const string LoginSucceeded = "auth.login_succeeded"; + + /// A failed login against a KNOWN user (the streamless store + /// holds unknown-actor attempts). Fields: UserId, Ip. + public const string LoginFailed = "auth.login_failed"; + + /// An aggregated known-user failure streak (Decision (b)) — one row + /// per resolved streak, carrying the count, not one per attempt. Fields: + /// UserId, Count. + public const string LoginFailuresObserved = "auth.login_failures_observed"; + + /// Account crossed the lockout threshold. Fields: UserId. + public const string AccountLockedOut = "auth.locked_out"; + + /// Lockout cleared/expired. Fields: UserId. + public const string AccountUnlocked = "auth.unlocked"; + + // ── Account lifecycle (user-stream) ────────────────────────────── + public const string AccountCreated = "account.created"; + public const string AccountDeleted = "account.deleted"; + public const string AccountProfileUpdated = "account.profile_updated"; + public const string AccountUserNameChanged = "account.username_changed"; + public const string AccountPasswordChanged = "account.password_changed"; + public const string AccountActivated = "account.activated"; + public const string AccountDeactivated = "account.deactivated"; + + // ── Federation (user-stream mirror events) ─────────────────────── + public const string IdentityLinked = "federation.identity_linked"; + public const string IdentityUnlinked = "federation.identity_unlinked"; + + // ── Admin / realm config (config-aggregate streams) ────────────── + public const string LoginProviderAdded = "admin.login_provider_added"; + public const string LoginProviderUpdated = "admin.login_provider_updated"; + public const string LoginProviderEnabled = "admin.login_provider_enabled"; + public const string LoginProviderDisabled = "admin.login_provider_disabled"; + public const string LoginProviderSecretRotated = "admin.login_provider_secret_rotated"; + public const string LoginProviderDeleted = "admin.login_provider_deleted"; + + // ───────────────────────────────────────────────────────────────────── + // Streamless (Track A — SecurityAuditEntry, legitimate interest + retention) + // ───────────────────────────────────────────────────────────────────── + + // ── Security: streamless threats (tenant-visible) ──────────────── + /// Login attempt against a username/email matching no active user + /// (password or magic-link). Actor = attempted identifier; carries Ip. + public const string LoginFailedUnknownUser = "security.login_failed_unknown_user"; + + /// Magic-link login with an invalid/expired token (anonymous probe). + /// Carries Ip. + public const string MagicLinkInvalid = "security.magic_link_invalid"; + + /// An external/federation login was rejected before any user link — + /// domain allowlist, JIT disabled, inactive user, malformed token, or a + /// misconfigured provider. Reason disambiguates. Covers the SAML + /// protocol gates (no metadata, no SSO endpoint, context-build / response-read + /// failure, non-success status) as well as the OIDC/processor rejections. + public const string ExternalLoginRejected = "security.external_login_rejected"; + + /// A SAML response failed the admin-required signature check + /// (response/assertion unsigned). A distinct tamper / signature-wrapping + /// attack signal — not a config/transport problem — so it gets its own code. + /// Reason carries the failing tag (response-unsigned / assertion-unsigned / …). + public const string SamlSignatureRejected = "security.saml_signature_rejected"; + + /// A link attempt was rejected because the external subject is already + /// linked to a DIFFERENT user (attempted account takeover). + public const string IdentityHijackBlocked = "security.identity_hijack_blocked"; + + /// JIT / user-update-script create blocked — the email is already taken + /// by another user (prevents takeover via auto-provisioning). + public const string JitEmailConflict = "security.jit_email_conflict"; + + /// Externally-derived group(s) conferring realm:admin were + /// dropped at login (federation privilege-escalation guard). + public const string PrivilegeEscalationBlocked = "security.privilege_escalation_blocked"; + + /// A rate limit was triggered (DCR or login surface). Actor = Ip. + public const string RateLimitTriggered = "security.rate_limit_triggered"; + + /// A DCR client registration was rejected (policy / validation). + public const string DcrRegistrationRejected = "security.dcr_registration_rejected"; + + /// A bootstrap-admin invite consume was rejected (wrong/expired code). + /// Carries Ip. NB: the invite code itself is never stored. + public const string BootstrapInviteRejected = "security.bootstrap_invite_rejected"; + + // ── Audit-of-the-audit (tenant-visible) ────────────────────────── + /// The audit/security log was cleared by an operator. Records WHO + + /// when + realm — a forensic record of the destructive action itself. + public const string AuditLogCleared = "audit.log_cleared"; + + /// The audit/security log was exported by an operator. + public const string AuditLogExported = "audit.log_exported"; + + // ── Operations: realm/platform actions ─────────────────────────── + /// A realm signing key was rotated by an admin (tenant-visible). + public const string SigningKeyRotated = "ops.signing_key_rotated"; + + /// The signing-key janitor purged expired retired keys (platform-only). + public const string SigningKeyPurged = "ops.signing_key_purged"; + + /// A realm's SAML SP certificate was rotated or first generated + /// (tenant-visible — a realm-relevant trust change). + public const string SamlCertRotated = "ops.saml_cert_rotated"; + + /// Background SAML metadata refresh tick / IdP signing-cert change + /// (platform-only). + public const string SamlMetadataRefreshed = "ops.saml_metadata_refreshed"; + + /// A recovery-CLI operation was invoked (filesystem-trust, control-plane + /// only). Reason carries the specific operation + parameters. + public const string RecoveryCliInvoked = "ops.recovery_cli_invoked"; + + /// A realm database was provisioned (platform-only). Closes the gap + /// where RealmProvisioningService logged without the "Auth:" + /// prefix and never reached the legacy log at all. + public const string RealmProvisioned = "ops.realm_provisioned"; + + /// An existing database was adopted as a realm (platform-only). + public const string RealmAdopted = "ops.realm_adopted"; + + /// The control-plane role was transferred to another realm + /// (platform-only). + public const string ControlPlaneTransferred = "ops.control_plane_transferred"; + + /// A per-realm account-lifecycle sweep ran (reminders / self-erase / + /// auto-purge counts). Platform-only operational summary. + public const string AccountLifecycleSwept = "ops.account_lifecycle_swept"; + + /// A bootstrap-admin invite was issued (tenant-visible realm-init). + /// Any email is masked at the call site. + public const string BootstrapInviteIssued = "ops.bootstrap_invite_issued"; + + /// A DCR client was registered (tenant-visible). + public const string DcrClientRegistered = "ops.dcr_client_registered"; + + /// A registered DCR client was used for the first time — a clean signal + /// the registration was real, not bot noise (tenant-visible). + public const string DcrClientFirstUsed = "ops.dcr_client_first_used"; + + /// A DCR client was garbage-collected for inactivity (tenant-visible). + public const string DcrClientGarbageCollected = "ops.dcr_client_garbage_collected"; + + // ───────────────────────────────────────────────────────────────────── + // Routing helpers (the taxonomy is the source of truth for category + + // visibility, so a call site passes only the EventType — it cannot mark a + // platform-only event tenant-visible by mistake). + // ───────────────────────────────────────────────────────────────────── + + /// The code an event type belongs to, + /// derived from its prefix. Used by the streamless sink to stamp the row. + public static string CategoryOf(string eventType) => eventType switch + { + _ when eventType.StartsWith("ops.", StringComparison.Ordinal) => AuditCategories.Operations, + _ when eventType.StartsWith("security.", StringComparison.Ordinal) + || eventType.StartsWith("audit.", StringComparison.Ordinal) => AuditCategories.SecurityOps, + _ when eventType.StartsWith("account.", StringComparison.Ordinal) => AuditCategories.Account, + _ when eventType.StartsWith("federation.", StringComparison.Ordinal) => AuditCategories.Federation, + _ when eventType.StartsWith("admin.", StringComparison.Ordinal) => AuditCategories.AdminRealm, + _ => AuditCategories.Authentication, // auth.* + }; + + /// + /// Streamless event types that are control-plane-only — cross-realm infra + /// or platform operations a tenant realm-admin must NOT see. Everything else in + /// the streamless store is tenant-visible (a realm-admin sees their own realm's + /// rows). The read endpoint filters on the resolved flag stored on each row. + /// + private static readonly HashSet PlatformOnlyEvents = + [ + SigningKeyPurged, + SamlMetadataRefreshed, + RecoveryCliInvoked, + RealmProvisioned, + RealmAdopted, + ControlPlaneTransferred, + AccountLifecycleSwept, + ]; + + /// True if the event type is control-plane-only (see + /// ). + public static bool IsPlatformOnly(string eventType) => PlatformOnlyEvents.Contains(eventType); +} diff --git a/src/dotnet/Modgud.Infrastructure/Audit/ISecurityAuditLog.cs b/src/dotnet/Modgud.Infrastructure/Audit/ISecurityAuditLog.cs new file mode 100644 index 00000000..87ae3d87 --- /dev/null +++ b/src/dotnet/Modgud.Infrastructure/Audit/ISecurityAuditLog.cs @@ -0,0 +1,69 @@ +namespace Modgud.Infrastructure.Audit; + +/// +/// One streamless security/ops occurrence to record. The caller supplies the +/// code plus whatever context it has; the sink derives +/// the Category + control-plane visibility from the code (the taxonomy is +/// the source of truth) and stamps the realm + timestamp at emit. +/// +/// PII is the caller's responsibility to minimise. Pass an attempted +/// username / masked email / IP as only where it is the +/// security signal; never put secrets, tokens, or invite codes in any field. +/// +public sealed record SecurityAuditRecord +{ + /// An streamless code (security.* / + /// ops.* / audit.*). + public required string EventType { get; init; } + + /// Explicit realm slug, overriding the ambient + /// TenantContext.Current. Set this from realm-iterating background + /// jobs (the signing-key janitor, DCR GC, lifecycle sweep, realm + /// provisioning) which run in the system session but emit per-realm + /// rows — exactly the case the legacy RealmLogEnricher's explicit + /// {Realm} binding handled. Leave null on the request path (the ambient + /// realm is correct there). + public string? Realm { get; init; } + + /// "Info" | "Warning" | "Error" — the legacy level mapping. + public string Level { get; init; } = "Info"; + + /// Who/what the event is about: an attempted username, a masked email, + /// an acting admin's username, or an IP for a purely anonymous actor. A display + /// string (NOT a user-id GUID) so the cross-realm read needs no per-tenant join. + /// Null when there is no meaningful actor. + public string? Actor { get; init; } + + /// Source IP where the event carries one. Personal data under CJEU + /// Breyer — retained only for the short prune window. + public string? Ip { get; init; } + + /// Coarse outcome, e.g. "rejected" | "succeeded" | "rotated". Optional. + public string? Status { get; init; } + + /// Disambiguating detail (e.g. the rejection reason, the recovery-CLI + /// operation). Already PII-minimised by the caller. + public string? Reason { get; init; } + + /// Human-readable rendering for the admin grid (carried forward from the + /// legacy free-text Message column so the existing view keeps working). + public string Message { get; init; } = ""; +} + +/// +/// Best-effort sink for the streamless security/ops audit store (Track A, Phase 3). +/// Replaces the "Auth:"-message-prefix Serilog sink: call sites emit a typed +/// instead of stringly-typed log lines. +/// +/// Contract: is non-blocking and NEVER throws — a +/// failed enqueue drops the record rather than break the auth flow. The realm is +/// captured from TenantContext.Current at call time (the background writer +/// runs tenant-less). Durability is best-effort by design: this is a short-retention +/// legitimate-interest store, not the per-subject GDPR audit (which is the +/// event-sourced AuthAuditView). +/// +public interface ISecurityAuditLog +{ + /// Enqueue a streamless security/ops record. Non-blocking, never throws. + void Record(SecurityAuditRecord record); +} diff --git a/src/dotnet/Modgud.Infrastructure/Audit/SecurityAuditEntry.cs b/src/dotnet/Modgud.Infrastructure/Audit/SecurityAuditEntry.cs new file mode 100644 index 00000000..2de4976d --- /dev/null +++ b/src/dotnet/Modgud.Infrastructure/Audit/SecurityAuditEntry.cs @@ -0,0 +1,66 @@ +using Marten.Schema; + +namespace Modgud.Infrastructure.Audit; + +/// +/// A flat, typed, NON-event-sourced row in the streamless security/ops store +/// (logging/audit redesign Track A — the half that has no aggregate stream). One +/// document per occurrence; lives cross-realm in the system DB, attributed +/// to a realm via and scoped at read by the caller's realm + +/// (carrying PR #50's ScopeToCallerRealm forward). +/// +/// This is the successor to the personal-data-bearing-but-streamless portion +/// of the old AuthLogDocument: unknown-actor login attempts, probes, +/// rate-limit hits, and operational actions. Processed under Art. 6(1)(f) +/// (security / fraud detection); short hard retention is the proportionality +/// control (a Quartz prune), NOT per-subject erasure — there is no subject +/// stream to attach these to. See dev-docs/future-features/logging-audit-redesign.md +/// §A.5 + the Legitimate-Interest Assessment. +/// +[DocumentAlias("security_audit_entry")] +public class SecurityAuditEntry +{ + public Guid Id { get; init; } = Guid.NewGuid(); + + public DateTimeOffset Timestamp { get; init; } + + /// Realm slug the event was emitted in (from TenantContext.Current + /// at emit; background / no-tenant work is attributed to system). All rows + /// share the system DB; this column scopes the admin read. + public string? Realm { get; init; } + + /// code (derived from the event type). + public string Category { get; init; } = ""; + + /// code (a streamless security.* / ops.* / + /// audit.* code). + public string EventType { get; init; } = ""; + + /// "Info" | "Warning" | "Error". + public string Level { get; init; } = "Info"; + + /// True for control-plane-only events (cross-realm infra / platform ops). + /// Derived from the event type at emit () + /// and stored so the read can filter on a column: a tenant realm-admin sees only + /// PlatformOnly == false rows for their realm; the control-plane sees all. + public bool PlatformOnly { get; init; } + + /// Who/what the event is about — an attempted username, masked email, + /// acting admin, or IP. A display string, not a user-id GUID. May be personal + /// data; retained only for the prune window. Surfaced as the grid's "user" column. + public string? Actor { get; init; } + + /// Source IP where present. Personal data (CJEU Breyer) — retained + /// only for the prune window. + public string? Ip { get; init; } + + /// Coarse outcome ("rejected" | "succeeded" | "rotated" | …). Optional. + public string? Status { get; init; } + + /// Disambiguating detail (rejection reason, recovery-CLI operation, …). + public string? Reason { get; init; } + + /// Human-readable rendering for the admin grid (carry-forward of the + /// legacy Message column). + public string Message { get; init; } = ""; +} diff --git a/src/dotnet/Modgud.Infrastructure/Audit/SecurityAuditLog.cs b/src/dotnet/Modgud.Infrastructure/Audit/SecurityAuditLog.cs new file mode 100644 index 00000000..978b5fe9 --- /dev/null +++ b/src/dotnet/Modgud.Infrastructure/Audit/SecurityAuditLog.cs @@ -0,0 +1,139 @@ +using System.Threading.Channels; +using Marten; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; +using Modgud.Infrastructure.Persistence.Tenancy; + +namespace Modgud.Infrastructure.Audit; + +/// +/// In-process implementation of : a bounded channel +/// that drains to the system DB. +/// +/// Bounded + drop-on-full (the legacy sink was an UNBOUNDED channel — +/// a memory-growth risk under a credential-stuffing storm). When the writer can't +/// keep up the oldest behaviour we want is to shed load, never to block the auth +/// path or grow without limit. Dropped counts are exposed for the writer to log. +/// +/// The realm is captured HERE, on the calling (request) thread where +/// TenantContext.Current is set — the writer runs tenant-less in a +/// background service, exactly as RealmLogEnricher captured it for the +/// legacy sink. Category + control-plane visibility are derived from the event type +/// so the row can't disagree with the taxonomy. +/// +public sealed class SecurityAuditLog : ISecurityAuditLog +{ + // Generous bound: a real burst is absorbed; a pathological flood sheds rather + // than OOMs. SingleReader because exactly one SecurityAuditWriter drains it. + private readonly Channel _channel = + Channel.CreateBounded(new BoundedChannelOptions(50_000) + { + FullMode = BoundedChannelFullMode.DropWrite, + SingleReader = true, + }); + + private long _dropped; + + internal ChannelReader Reader => _channel.Reader; + + /// Total records dropped because the channel was full (read-and-reset + /// by the writer so it can log bursts). + internal long ReadAndResetDropped() => Interlocked.Exchange(ref _dropped, 0); + + public void Record(SecurityAuditRecord record) + { + var entry = new SecurityAuditEntry + { + Timestamp = DateTimeOffset.UtcNow, + // Explicit override wins (realm-iterating background jobs), else the + // ambient realm — mirrors the legacy RealmLogEnricher dual-sourcing. + Realm = record.Realm ?? TenantContext.Current, + EventType = record.EventType, + Category = AuditEvents.CategoryOf(record.EventType), + PlatformOnly = AuditEvents.IsPlatformOnly(record.EventType), + Level = record.Level, + Actor = record.Actor, + Ip = record.Ip, + Status = record.Status, + Reason = record.Reason, + Message = record.Message, + }; + + if (!_channel.Writer.TryWrite(entry)) + Interlocked.Increment(ref _dropped); + } + + /// + /// Synchronously drain everything currently queued to the system DB. For + /// SHORT-LIVED process paths that never start the host (so + /// never runs) — notably the recovery CLI and + /// STARTUP_COMMAND. Without this, records those paths enqueue would be lost on + /// exit, which is exactly the high-value break-glass forensic trail we must keep. + /// Safe to call when the channel is empty (no-op). NOT used on the normal web + /// path, where the background writer owns the drain. + /// + public async Task FlushAsync(IDocumentStore store, CancellationToken ct = default) + { + var batch = new List(); + while (_channel.Reader.TryRead(out var entry)) + batch.Add(entry); + + if (batch.Count == 0) + return; + + await using var session = store.LightweightSession(TenantConstants.SystemTenantId); + session.Store(batch.ToArray()); + await session.SaveChangesAsync(ct); + } +} + +/// +/// Background service that drains into the system DB +/// in batches. Replaces the legacy AuthLogPersistenceService drain loop; the +/// retention prune that lived there is now a separate Quartz job over this store. +/// +public sealed class SecurityAuditWriter( + IServiceProvider services, + SecurityAuditLog log, + ILogger logger) : BackgroundService +{ + private const int MaxBatch = 256; + + protected override async Task ExecuteAsync(CancellationToken stoppingToken) + { + var reader = log.Reader; + while (await reader.WaitToReadAsync(stoppingToken)) + { + var batch = new List(MaxBatch); + while (batch.Count < MaxBatch && reader.TryRead(out var entry)) + batch.Add(entry); + + if (batch.Count == 0) + continue; + + try + { + using var scope = services.CreateScope(); + // Runs out-of-band in a HostedService — no HttpContext to drive + // tenant resolution, so target the system tenant explicitly. The + // streamless store lives cross-realm in the system DB by design; + // each row already carries its own Realm captured at emit time. + await using var session = scope.ServiceProvider + .GetRequiredService() + .LightweightSession(TenantConstants.SystemTenantId); + + session.Store(batch.ToArray()); + await session.SaveChangesAsync(stoppingToken); + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + logger.LogError(ex, "Failed to persist {Count} security audit entries", batch.Count); + } + + var dropped = log.ReadAndResetDropped(); + if (dropped > 0) + logger.LogWarning("Security audit store shed {Dropped} record(s) — channel full", dropped); + } + } +} diff --git a/src/dotnet/Modgud.Infrastructure/Authorization/AppRealmSeeder.cs b/src/dotnet/Modgud.Infrastructure/Authorization/AppRealmSeeder.cs index 126c6f99..21cdd326 100644 --- a/src/dotnet/Modgud.Infrastructure/Authorization/AppRealmSeeder.cs +++ b/src/dotnet/Modgud.Infrastructure/Authorization/AppRealmSeeder.cs @@ -41,9 +41,11 @@ private static readonly (string Resource, string[] Actions)[] ModgudCatalog = ("authorization-group", ["read", "write"]), ("permission-role", ["read", "write"]), - // Sessions + audit + // Sessions + audit. auth-log:read = the streamless security/ops store; + // audit-log:read = the per-realm GDPR-audit (event-sourced) — two surfaces. ("session", ["read", "write"]), ("auth-log", ["read"]), + ("audit-log", ["read"]), // GDPR (permanent-erase only — self-service is implicit on the caller) ("gdpr", ["admin"]), diff --git a/src/dotnet/Modgud.Infrastructure/DependencyInjection.cs b/src/dotnet/Modgud.Infrastructure/DependencyInjection.cs index 964e3934..8ed7a8d9 100644 --- a/src/dotnet/Modgud.Infrastructure/DependencyInjection.cs +++ b/src/dotnet/Modgud.Infrastructure/DependencyInjection.cs @@ -169,9 +169,15 @@ public static IServiceCollection AddInfrastructure( opt.RegisterResource(app, "authorization-group", "read", "write"); opt.RegisterResource(app, "permission-role", "read", "write"); - // Sessions + audit + // Sessions + audit. Two distinct read surfaces (logging/audit redesign): + // auth-log:read — the streamless security/ops store (failed logins on + // unknown actors, probes, rate-limits, operational + // actions). Cross-realm in the system DB. + // audit-log:read — the per-realm GDPR-audit (event-sourced account / + // login history projected from the user streams). opt.RegisterResource(app, "session", "read", "write"); opt.RegisterResource(app, "auth-log", "read"); + opt.RegisterResource(app, "audit-log", "read"); // In-app live observability view (Phase 5 of the OpenTelemetry // followup). Read-only; the granular Prometheus scrape is gated diff --git a/src/dotnet/Modgud.Infrastructure/Observability/RealmErrorBuffer.cs b/src/dotnet/Modgud.Infrastructure/Observability/RealmErrorBuffer.cs new file mode 100644 index 00000000..28ee6976 --- /dev/null +++ b/src/dotnet/Modgud.Infrastructure/Observability/RealmErrorBuffer.cs @@ -0,0 +1,127 @@ +using System.Collections.Concurrent; + +namespace Modgud.Infrastructure.Observability; + +/// +/// In-memory live error feed for the in-app observability view +/// (logging/audit redesign Phase 5, §B.3). Captures recent operational +/// error records so a realm-admin can live-tail "what is erroring on my +/// realm" without standing up OpenObserve. +/// +/// Per-realm-bounded buffers — deliberately NOT a single global +/// ring. The sibling is one +/// global ring with query-time realm filtering, where a loud realm provably +/// evicts a quiet realm's events before its admin sees them. This buffer keeps +/// an independently-capped ring per realm (keyed by realm slug), so a +/// noisy realm can only evict its own oldest entries — a quiet realm's +/// error visibility can never be starved. Each realm ring evicts its own +/// oldest; there is no retention job (§B.3). +/// +/// Memory is bounded by realms × capacityPerRealm; realm count is +/// bounded by the tenant count and each ring is fixed-size, so the total stays +/// small. Rings are created lazily on first record for a realm. +/// +/// This feed does NOT pass through the OTel collector redaction. +/// Like the streamless security store, the call-site PII belt +/// (LogPiiMasking + the Phase-4 source-belt that logs user.Id +/// rather than usernames) plus per-realm read scoping are the only PII +/// controls here. Entries are rendered+truncated at capture by +/// ErrorFeedSink. +/// +/// Multi-instance note: this buffer is local to the process. Phase 5 +/// deliberately doesn't replicate across instances — see the HA / +/// Redis-backplane trade-off. Single-instance is the supported shape today. +/// +public sealed class RealmErrorBuffer +{ + public const int DefaultCapacityPerRealm = 100; + + private readonly ConcurrentDictionary _rings = + new(StringComparer.Ordinal); + private readonly int _capacityPerRealm; + + public RealmErrorBuffer(int capacityPerRealm = DefaultCapacityPerRealm) + { + _capacityPerRealm = capacityPerRealm < 1 ? DefaultCapacityPerRealm : capacityPerRealm; + } + + /// + /// Fired after every so live observers (the SignalARR + /// ObservabilityHub.LogsSubscribe) can push to subscribed clients + /// without polling. Handlers are invoked synchronously on the recording + /// (log-emit) thread — keep them cheap. Handler exceptions are swallowed so + /// a buggy subscriber can't break logging for everyone else. + /// + public event Action? EntryRecorded; + + public void Record(ErrorLogEntry entry) + { + var ring = _rings.GetOrAdd(entry.Realm, _ => new RealmRing(_capacityPerRealm)); + ring.Add(entry); + + var handler = EntryRecorded; + if (handler is null) return; + foreach (var single in handler.GetInvocationList()) + { + try { ((Action)single)(entry); } + catch { /* swallow — never let a subscriber break logging */ } + } + } + + /// Most-recent first, for the given realm only. Unknown realm → empty. + public IReadOnlyList GetRecent(string realm, int limit) + => _rings.TryGetValue(realm, out var ring) + ? ring.Snapshot(limit) + : Array.Empty(); + + /// A single realm's fixed-size FIFO ring. Independently capped. + private sealed class RealmRing + { + private readonly object _gate = new(); + private readonly Queue _items; + private readonly int _capacity; + + public RealmRing(int capacity) + { + _capacity = capacity; + _items = new Queue(capacity); + } + + public void Add(ErrorLogEntry entry) + { + lock (_gate) + { + _items.Enqueue(entry); + while (_items.Count > _capacity) _items.Dequeue(); + } + } + + public IReadOnlyList Snapshot(int limit) + { + lock (_gate) + { + var arr = _items.ToArray(); // oldest..newest + var take = Math.Min(limit, arr.Length); + var result = new ErrorLogEntry[take]; + // Reverse-fill: newest entries sit at the END of the queue. + for (var i = 0; i < take; i++) + result[i] = arr[arr.Length - 1 - i]; + return result; + } + } + } +} + +/// +/// One captured operational error, already rendered and truncated at the sink. +/// No raw LogEvent / exception object is retained — only display-safe +/// strings — so the buffer holds no live references and a bounded footprint. +/// +public record ErrorLogEntry( + DateTimeOffset Timestamp, + string Realm, + string Level, + string Message, + string? Exception, + string? SourceContext, + string? TraceId); diff --git a/src/dotnet/Modgud.Infrastructure/OpenIddict/DcrLastUsedTrackerHandler.cs b/src/dotnet/Modgud.Infrastructure/OpenIddict/DcrLastUsedTrackerHandler.cs index 4bf127e2..7a10a0f5 100644 --- a/src/dotnet/Modgud.Infrastructure/OpenIddict/DcrLastUsedTrackerHandler.cs +++ b/src/dotnet/Modgud.Infrastructure/OpenIddict/DcrLastUsedTrackerHandler.cs @@ -1,8 +1,7 @@ using System.Text.Json; -using Modgud.Application.Dcr; using Modgud.Domain.OAuth.Applications; +using Modgud.Infrastructure.Audit; using Marten; -using Microsoft.Extensions.Logging; using OpenIddict.Server; namespace Modgud.Infrastructure.OpenIddict; @@ -36,12 +35,12 @@ public sealed class DcrLastUsedTrackerHandler .Build(); private readonly IDocumentSession _session; - private readonly ILogger _logger; + private readonly ISecurityAuditLog _securityAudit; - public DcrLastUsedTrackerHandler(IDocumentSession session, ILogger logger) + public DcrLastUsedTrackerHandler(IDocumentSession session, ISecurityAuditLog securityAudit) { _session = session; - _logger = logger; + _securityAudit = securityAudit; } public async ValueTask HandleAsync(OpenIddictServerEvents.ProcessSignInContext context) @@ -79,10 +78,15 @@ public async ValueTask HandleAsync(OpenIddictServerEvents.ProcessSignInContext c if (isFirstUse) { - _logger.LogInformation( - "Auth: " + DcrAuditEvents.ClientFirstUsed + - " ClientId={ClientId} RegisteredAt={RegisteredAt}", - clientId, registeredAt ?? "(unknown)"); + _securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.DcrClientFirstUsed, + Level = "Info", + Actor = clientId, + Status = "first_used", + Reason = $"registeredAt {registeredAt ?? "(unknown)"}", + Message = $"DCR client {clientId} used for the first time", + }); } } diff --git a/src/dotnet/Modgud.Infrastructure/OpenIddict/RealmSigningKeyHandler.cs b/src/dotnet/Modgud.Infrastructure/OpenIddict/RealmSigningKeyHandler.cs index a2a05b45..103ff8a7 100644 --- a/src/dotnet/Modgud.Infrastructure/OpenIddict/RealmSigningKeyHandler.cs +++ b/src/dotnet/Modgud.Infrastructure/OpenIddict/RealmSigningKeyHandler.cs @@ -103,7 +103,7 @@ public async ValueTask HandleAsync(GenerateTokenContext context) } } - _logger.LogDebug("Auth: signed {TokenType} for realm '{Slug}' with kid '{Kid}', issuer '{Issuer}'", + _logger.LogDebug("signed {TokenType} for realm '{Slug}' with kid '{Kid}', issuer '{Issuer}'", context.TokenType, slug, creds.Key.KeyId, context.SecurityTokenDescriptor?.Issuer); } diff --git a/src/dotnet/Modgud.Infrastructure/Realms/RealmProvisioningService.cs b/src/dotnet/Modgud.Infrastructure/Realms/RealmProvisioningService.cs index aa388e58..8e544edc 100644 --- a/src/dotnet/Modgud.Infrastructure/Realms/RealmProvisioningService.cs +++ b/src/dotnet/Modgud.Infrastructure/Realms/RealmProvisioningService.cs @@ -1,6 +1,7 @@ using Modgud.Application.DTOs.Realms; using Modgud.Application.Services; using Modgud.Domain.Realms; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Authorization; using Modgud.Infrastructure.OAuth; using Modgud.Infrastructure.Persistence.Tenancy; @@ -72,6 +73,7 @@ public sealed class RealmProvisioningService : IRealmProvisioningService private readonly IMasterConnectionString _masterCs; private readonly IRealmCache _realmCache; private readonly IServiceProvider _serviceProvider; + private readonly ISecurityAuditLog _securityAudit; private readonly ILogger _logger; public RealmProvisioningService( @@ -80,6 +82,7 @@ public RealmProvisioningService( IMasterConnectionString masterCs, IRealmCache realmCache, IServiceProvider serviceProvider, + ISecurityAuditLog securityAudit, ILogger logger) { _globalStore = globalStore; @@ -87,6 +90,7 @@ public RealmProvisioningService( _masterCs = masterCs; _realmCache = realmCache; _serviceProvider = serviceProvider; + _securityAudit = securityAudit; _logger = logger; } @@ -181,6 +185,15 @@ public async Task> CreateRealmAsync(CreateRealmDto dto, Cancellat #pragma warning restore CA2100 await createDbCmd.ExecuteNonQueryAsync(ct); _logger.LogInformation("Created database {DbName} for realm {Slug}", tenantDbName, dto.Slug); + _securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RealmProvisioned, + Level = "Info", + Realm = dto.Slug, + Status = "provisioned", + Reason = $"database {tenantDbName}", + Message = $"Created database {tenantDbName} for realm {dto.Slug}", + }); } } @@ -429,6 +442,15 @@ await AppRealmSeeder.SeedAsync( _logger.LogWarning( "Control plane transferred to realm {Slug} (cleared {Count} previous holder(s))", targetSlug, otherHolders.Count); + _securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.ControlPlaneTransferred, + Level = "Warning", + Realm = targetSlug, + Status = "transferred", + Reason = $"to realm {targetSlug}, {otherHolders.Count} previous holder(s)", + Message = $"Control plane transferred to realm {targetSlug} (cleared {otherHolders.Count} previous holder(s))", + }); return target; } @@ -511,6 +533,15 @@ await seederScope.ServiceProvider _realmCache.Invalidate(); _logger.LogInformation("Adopted existing database {DbName} as realm {Slug}", tenantDbName, slug); + _securityAudit.Record(new SecurityAuditRecord + { + EventType = AuditEvents.RealmAdopted, + Level = "Info", + Realm = slug, + Status = "adopted", + Reason = $"database {tenantDbName}", + Message = $"Adopted existing database {tenantDbName} as realm {slug}", + }); return realm; } } diff --git a/src/dotnet/Modgud.Tests.Unit/AuthLog/AuthLogAttributionTests.cs b/src/dotnet/Modgud.Tests.Unit/AuthLog/AuthLogAttributionTests.cs index eb0e46ec..863a2999 100644 --- a/src/dotnet/Modgud.Tests.Unit/AuthLog/AuthLogAttributionTests.cs +++ b/src/dotnet/Modgud.Tests.Unit/AuthLog/AuthLogAttributionTests.cs @@ -1,5 +1,6 @@ using Modgud.Authentication.Api.Admin; using Modgud.Authentication.AuthLog; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Persistence.Tenancy; using Serilog.Core; using Serilog.Events; @@ -8,11 +9,12 @@ namespace Modgud.Tests.Unit.AuthLog; /// -/// The realm attribution that scopes the admin auth-log read: the enricher -/// captures the ambient realm at log time, and the sink reads it off the event -/// onto the persisted document. These are the two deterministic seams of the -/// AuthLog tenant-visibility fix (the realm filtering in the read endpoint is a -/// trivial Where over this column). +/// Two deterministic seams of the security/audit logging: +/// (1) stamps the ambient realm on Serilog events at +/// emit time (kept after the "Auth:" sink was retired — it tags operational logs + +/// the Phase-4 OTel export); and (2) the realm + tenant-visibility scoping the admin +/// Security-log read applies ( over +/// the streamless store). /// public class AuthLogAttributionTests { @@ -54,67 +56,38 @@ public void Enricher_NoAmbientTenant_FallsBackToSystem() Assert.Equal("system", ((ScalarValue)v).Value); } - // ── Sink ──────────────────────────────────────────────────────────── + // ── Read scoping (AuthLogEndpoints.ScopeToCallerRealm over the streamless store) ── - [Fact] - public void Sink_CapturesRealmProperty_OntoDocument() + private static IQueryable Rows() => new[] { - var sink = new AuthLogSink(); - - sink.Emit(AuthEvent("Auth: login successful {UserName}", - new LogEventProperty("UserName", new ScalarValue("bob")), - new LogEventProperty("Realm", new ScalarValue("acme")))); - - Assert.True(sink.Reader.TryRead(out var doc)); - Assert.Equal("acme", doc!.Realm); - Assert.Equal("bob", doc.UserName); - } - - [Fact] - public void Sink_NoRealmProperty_LeavesRealmNull() - { - var sink = new AuthLogSink(); - - sink.Emit(AuthEvent("Auth: background event")); - - Assert.True(sink.Reader.TryRead(out var doc)); - Assert.Null(doc!.Realm); - } + new SecurityAuditEntry { Message = "a", Realm = "system", PlatformOnly = false }, + new SecurityAuditEntry { Message = "b", Realm = "acme", PlatformOnly = false }, + new SecurityAuditEntry { Message = "c", Realm = "globex", PlatformOnly = false }, + new SecurityAuditEntry { Message = "p", Realm = "acme", PlatformOnly = true }, + }.AsQueryable(); [Fact] - public void Sink_IgnoresNonAuthEvents() + public void Scope_ControlPlane_SeesEveryRealm_IncludingPlatformOnly() { - var sink = new AuthLogSink(); - - sink.Emit(AuthEvent("Some unrelated log {Realm}", - new LogEventProperty("Realm", new ScalarValue("acme")))); - - Assert.False(sink.Reader.TryRead(out _)); // only "Auth:"-prefixed events are persisted + var result = AuthLogEndpoints.ScopeToCallerRealm(Rows(), "system", callerIsControlPlane: true).ToList(); + Assert.Equal(4, result.Count); // the control-plane realm sees the full cross-realm log, platform-only included } - // ── Read scoping (AuthLogEndpoints.ScopeToCallerRealm) ────────────── - - private static IQueryable Rows() => new[] - { - new AuthLogDocument { Message = "a", Realm = "system" }, - new AuthLogDocument { Message = "b", Realm = "acme" }, - new AuthLogDocument { Message = "c", Realm = "globex" }, - new AuthLogDocument { Message = "d", Realm = null }, - }.AsQueryable(); - [Fact] - public void Scope_ControlPlane_SeesEveryRealm() + public void Scope_TenantRealm_SeesOnlyOwnRealm_TenantVisibleOnly() { - var result = AuthLogEndpoints.ScopeToCallerRealm(Rows(), "system", callerIsControlPlane: true).ToList(); - Assert.Equal(4, result.Count); // the control-plane realm sees the full cross-realm log + var result = AuthLogEndpoints.ScopeToCallerRealm(Rows(), "acme", callerIsControlPlane: false).ToList(); + Assert.Single(result); + Assert.Equal("b", result[0].Message); // own realm, tenant-visible — NOT the platform-only "p" row } [Fact] - public void Scope_TenantRealm_SeesOnlyOwnRealm() + public void Scope_TenantRealm_NeverSeesPlatformOnly() { + // A control-plane-only operational row in the caller's OWN realm must still + // be hidden from a tenant realm-admin. var result = AuthLogEndpoints.ScopeToCallerRealm(Rows(), "acme", callerIsControlPlane: false).ToList(); - Assert.Single(result); - Assert.Equal("acme", result[0].Realm); // a tenant realm-admin never sees other realms' events + Assert.DoesNotContain(result, r => r.PlatformOnly); } [Fact] diff --git a/src/dotnet/Modgud.Tests.Unit/ExternalAuth/DynamicSamlSchemeManagerTests.cs b/src/dotnet/Modgud.Tests.Unit/ExternalAuth/DynamicSamlSchemeManagerTests.cs index cc6ff592..e6af3383 100644 --- a/src/dotnet/Modgud.Tests.Unit/ExternalAuth/DynamicSamlSchemeManagerTests.cs +++ b/src/dotnet/Modgud.Tests.Unit/ExternalAuth/DynamicSamlSchemeManagerTests.cs @@ -4,6 +4,7 @@ using Modgud.Authentication.Domain.LoginProviders; using Modgud.Authentication.Identity.LoginProviders.Saml; using Modgud.Authentication.Identity.LoginProviders.Saml.Flavors; +using Modgud.Infrastructure.Audit; using Modgud.Infrastructure.Persistence.Tenancy; namespace Modgud.Tests.Unit.ExternalAuth; @@ -28,8 +29,16 @@ private static DynamicSamlSchemeManager NewManager() => }), new SamlMetadataFetcher(new NoNetworkHttpClientFactory(), NullLogger.Instance), TimeProvider.System, + new NoOpSecurityAuditLog(), NullLogger.Instance); + /// No-op audit sink — these are pure-construction tests; the manager's + /// metadata-refresh audit record is exercised by the integration suite. + private sealed class NoOpSecurityAuditLog : ISecurityAuditLog + { + public void Record(SecurityAuditRecord record) { } + } + /// /// Test double — returns an HttpClient that fails every request. Sufficient /// for the manager tests because none of them set FlavorData.MetadataUrl, diff --git a/src/dotnet/Modgud.Tests.Unit/Observability/ErrorFeedSinkTests.cs b/src/dotnet/Modgud.Tests.Unit/Observability/ErrorFeedSinkTests.cs new file mode 100644 index 00000000..cdbef2c6 --- /dev/null +++ b/src/dotnet/Modgud.Tests.Unit/Observability/ErrorFeedSinkTests.cs @@ -0,0 +1,139 @@ +using Modgud.Authentication.AuthLog; +using Modgud.Infrastructure.Observability; +using Serilog.Events; +using Serilog.Parsing; + +namespace Modgud.Tests.Unit.Observability; + +/// +/// The Phase-5 (§B.3) capture filter. Default scope (Open Decision #7) is +/// Error+ from Modgud.* loggers only; the realm tag is read from the +/// -stamped property. Both the level floor and +/// the source prefix are constructor params so the operator-configurable +/// behaviour is exercised here. +/// +public class ErrorFeedSinkTests +{ + private static readonly MessageTemplateParser Parser = new(); + + private static LogEvent Event( + LogEventLevel level, + string? sourceContext, + string? realm, + string template, + Exception? exception = null, + params LogEventProperty[] extra) + { + var props = new List(); + if (sourceContext is not null) props.Add(new LogEventProperty("SourceContext", new ScalarValue(sourceContext))); + if (realm is not null) props.Add(new LogEventProperty("Realm", new ScalarValue(realm))); + props.AddRange(extra); + return new LogEvent(DateTimeOffset.UtcNow, level, exception, Parser.Parse(template), props); + } + + private static (ErrorFeedSink sink, RealmErrorBuffer buffer) NewSink( + LogEventLevel min = LogEventLevel.Error, string prefix = "Modgud") + { + var buffer = new RealmErrorBuffer(); + return (new ErrorFeedSink(buffer, min, prefix), buffer); + } + + [Fact] + public void ErrorFromModgudSource_IsCaptured() + { + var (sink, buffer) = NewSink(); + sink.Emit(Event(LogEventLevel.Error, "Modgud.Authentication.Api.AccountEndpoints", "acme", "kaboom")); + + var rows = buffer.GetRecent("acme", 10); + Assert.Single(rows); + Assert.Equal("Error", rows[0].Level); + Assert.Equal("kaboom", rows[0].Message); + Assert.Equal("Modgud.Authentication.Api.AccountEndpoints", rows[0].SourceContext); + } + + [Fact] + public void FatalFromModgudSource_IsCaptured() + { + var (sink, buffer) = NewSink(); + sink.Emit(Event(LogEventLevel.Fatal, "Modgud.Api.Program", "acme", "down")); + Assert.Single(buffer.GetRecent("acme", 10)); + } + + [Fact] + public void BelowFloor_IsIgnored() + { + var (sink, buffer) = NewSink(); // default floor = Error + sink.Emit(Event(LogEventLevel.Warning, "Modgud.X", "acme", "just a warning")); + sink.Emit(Event(LogEventLevel.Information, "Modgud.X", "acme", "fyi")); + Assert.Empty(buffer.GetRecent("acme", 10)); + } + + [Fact] + public void NonModgudSource_IsIgnored() + { + var (sink, buffer) = NewSink(); + sink.Emit(Event(LogEventLevel.Error, "Microsoft.AspNetCore.Server", "acme", "framework error")); + sink.Emit(Event(LogEventLevel.Error, "Npgsql.Connection", "acme", "db error")); + Assert.Empty(buffer.GetRecent("acme", 10)); + } + + [Fact] + public void NoSourceContext_IsIgnored() + { + var (sink, buffer) = NewSink(); + sink.Emit(Event(LogEventLevel.Error, sourceContext: null, "acme", "static log error")); + Assert.Empty(buffer.GetRecent("acme", 10)); + } + + [Fact] + public void NoRealmProperty_FallsBackToSystem() + { + var (sink, buffer) = NewSink(); + sink.Emit(Event(LogEventLevel.Error, "Modgud.X", realm: null, "no realm tagged")); + Assert.Single(buffer.GetRecent("system", 10)); + } + + [Fact] + public void RendersMessageTemplateArguments() + { + var (sink, buffer) = NewSink(); + sink.Emit(Event( + LogEventLevel.Error, "Modgud.X", "acme", "failed for {Count} items", + extra: new LogEventProperty("Count", new ScalarValue(42)))); + + Assert.Equal("failed for 42 items", buffer.GetRecent("acme", 10)[0].Message); + } + + [Fact] + public void CapturesExceptionTypeAndMessage_NotTheGraph() + { + var (sink, buffer) = NewSink(); + sink.Emit(Event(LogEventLevel.Error, "Modgud.X", "acme", "boom", + exception: new InvalidOperationException("the cause"))); + + Assert.Equal("InvalidOperationException: the cause", buffer.GetRecent("acme", 10)[0].Exception); + } + + [Fact] + public void NoException_LeavesExceptionNull() + { + var (sink, buffer) = NewSink(); + sink.Emit(Event(LogEventLevel.Error, "Modgud.X", "acme", "boom")); + Assert.Null(buffer.GetRecent("acme", 10)[0].Exception); + } + + [Fact] + public void WidenedConfig_SinkFilter_AcceptsWarningFromAnySource() + { + // Operator widens the SINK's own filter: Warning floor + empty prefix. + // This exercises the sink in isolation; in production Serilog's pipeline + // floors (global Information + per-namespace Warning overrides) apply + // additionally upstream, so framework sub-Warning events still wouldn't + // reach the sink (see ErrorFeedSettings.MinimumLevel/SourcePrefix docs). + // The Npgsql Warning here clears both the sink filter and (with the + // shipped Npgsql→Warning override) the pipeline. + var (sink, buffer) = NewSink(min: LogEventLevel.Warning, prefix: ""); + sink.Emit(Event(LogEventLevel.Warning, "Npgsql.Connection", "acme", "transient")); + Assert.Single(buffer.GetRecent("acme", 10)); + } +} diff --git a/src/dotnet/Modgud.Tests.Unit/Observability/RealmErrorBufferTests.cs b/src/dotnet/Modgud.Tests.Unit/Observability/RealmErrorBufferTests.cs new file mode 100644 index 00000000..63361592 --- /dev/null +++ b/src/dotnet/Modgud.Tests.Unit/Observability/RealmErrorBufferTests.cs @@ -0,0 +1,103 @@ +using Modgud.Infrastructure.Observability; + +namespace Modgud.Tests.Unit.Observability; + +/// +/// The Phase-5 (§B.3) load-bearing guarantee: the error feed uses an +/// independently-capped ring PER realm, so a noisy realm can never evict a +/// quiet realm's errors (the failure mode of the global +/// ring this deliberately replaces). +/// +public class RealmErrorBufferTests +{ + private static ErrorLogEntry Entry(string realm, string message) => + new(DateTimeOffset.UtcNow, realm, "Error", message, Exception: null, SourceContext: "Modgud.X", TraceId: null); + + [Fact] + public void GetRecent_ReturnsNewestFirst() + { + var buffer = new RealmErrorBuffer(capacityPerRealm: 10); + buffer.Record(Entry("acme", "first")); + buffer.Record(Entry("acme", "second")); + buffer.Record(Entry("acme", "third")); + + var recent = buffer.GetRecent("acme", 10); + + Assert.Equal(new[] { "third", "second", "first" }, recent.Select(e => e.Message)); + } + + [Fact] + public void GetRecent_UnknownRealm_ReturnsEmpty() + { + var buffer = new RealmErrorBuffer(); + buffer.Record(Entry("acme", "x")); + + Assert.Empty(buffer.GetRecent("globex", 10)); + } + + [Fact] + public void GetRecent_RespectsLimit() + { + var buffer = new RealmErrorBuffer(capacityPerRealm: 10); + for (var i = 0; i < 5; i++) buffer.Record(Entry("acme", $"m{i}")); + + Assert.Equal(2, buffer.GetRecent("acme", 2).Count); + } + + [Fact] + public void NoisyRealm_DoesNotEvictQuietRealm() + { + // The whole point of Phase 5's per-realm rings (§B.3). Cap is small; a + // flood on one realm must leave another realm's single error intact. + var buffer = new RealmErrorBuffer(capacityPerRealm: 3); + + buffer.Record(Entry("quiet", "the one quiet error")); + for (var i = 0; i < 100; i++) buffer.Record(Entry("noisy", $"flood-{i}")); + + var quiet = buffer.GetRecent("quiet", 10); + Assert.Single(quiet); + Assert.Equal("the one quiet error", quiet[0].Message); + + // The noisy realm is independently capped at its own ring size. + Assert.Equal(3, buffer.GetRecent("noisy", 100).Count); + } + + [Fact] + public void PerRealmCap_EvictsOwnOldest() + { + var buffer = new RealmErrorBuffer(capacityPerRealm: 2); + buffer.Record(Entry("acme", "oldest")); + buffer.Record(Entry("acme", "middle")); + buffer.Record(Entry("acme", "newest")); + + var recent = buffer.GetRecent("acme", 10); + Assert.Equal(new[] { "newest", "middle" }, recent.Select(e => e.Message)); + } + + [Fact] + public void EntryRecorded_FiresForEachRecord_WithTheEntry() + { + var buffer = new RealmErrorBuffer(); + var seen = new List(); + buffer.EntryRecorded += seen.Add; + + buffer.Record(Entry("acme", "a")); + buffer.Record(Entry("globex", "b")); + + Assert.Equal(2, seen.Count); + Assert.Equal("a", seen[0].Message); + Assert.Equal("globex", seen[1].Realm); + } + + [Fact] + public void EntryRecorded_BuggySubscriber_DoesNotBreakRecording() + { + var buffer = new RealmErrorBuffer(); + buffer.EntryRecorded += _ => throw new InvalidOperationException("boom"); + + var ex = Record.Exception(() => buffer.Record(Entry("acme", "still recorded"))); + + Assert.Null(ex); + Assert.Single(buffer.GetRecent("acme", 10)); + } +} diff --git a/src/frontend-vue/public/i18n/de.json b/src/frontend-vue/public/i18n/de.json index ffac3cd4..abac5b77 100644 --- a/src/frontend-vue/public/i18n/de.json +++ b/src/frontend-vue/public/i18n/de.json @@ -658,16 +658,39 @@ "auto-group-drift": { "title": "Auto-Gruppen-Drift" } } }, - "authLog": { - "title": "Auth Log", + "securityLog": { + "title": "Sicherheit", "time": "Zeit", + "category": "Kategorie", + "event": "Ereignis", + "detail": "Detail", + "actor": "Akteur", + "ip": "IP-Adresse", "level": "Level", + "realm": "Realm", + "allCategories": "Alle", + "refresh": "Aktualisieren", + "clear": "Leeren" + }, + "auditLog": { + "title": "Audit-Log", + "time": "Zeit", + "category": "Kategorie", "event": "Ereignis", "user": "Benutzer", + "method": "Methode", "ip": "IP-Adresse", + "level": "Level", "realm": "Realm", "refresh": "Aktualisieren", - "clear": "Leeren" + "allCategories": "Alle" + }, + "logs": { + "title": "Protokolle", + "tabs": { + "audit": "Audit", + "security": "Sicherheit" + } }, "assets": { "title": "Asset-Library", @@ -768,7 +791,10 @@ "peak": "Peak", "activityFeed": "Letzte Ereignisse", "updated": "Aktualisiert", - "empty": "Keine Ereignisse im aktuellen Fenster." + "empty": "Keine Ereignisse im aktuellen Fenster.", + "errorFeed": "Letzte Fehler", + "errorFeedHint": "Anwendungsfehler in diesem Realm", + "errorsEmpty": "Keine Fehler erfasst." }, "section": { "authorization": "Autorisierung", diff --git a/src/frontend-vue/src/router/index.ts b/src/frontend-vue/src/router/index.ts index caf7724b..14187609 100644 --- a/src/frontend-vue/src/router/index.ts +++ b/src/frontend-vue/src/router/index.ts @@ -274,9 +274,20 @@ const routes = [ ], }, }, + { + // Combined logs home — Audit + Security as tabs. + path: 'logs', + component: () => import('@/views/admin/AdminLogsView.vue'), + }, + // Back-compat: the two surfaces used to be separate routes. Keep the + // links working by redirecting onto the matching tab. { path: 'auth-log', - component: () => import('@/views/admin/AuthLogView.vue'), + redirect: { path: '/admin/logs', query: { tab: 'security' } }, + }, + { + path: 'audit', + redirect: { path: '/admin/logs', query: { tab: 'audit' } }, }, { path: 'change-requests', @@ -488,7 +499,7 @@ router.beforeEach(async (to) => { 'oauth-api:read', 'login-provider:read', 'realm:read', 'realm-settings:read', - 'auth-log:read', 'session:read', 'observability:read', 'asset:read', + 'auth-log:read', 'audit-log:read', 'session:read', 'observability:read', 'asset:read', 'app:read', ] if (!ADMIN_PERMS.some((p) => authStore.hasPermission(p))) { diff --git a/src/frontend-vue/src/views/admin/AdminLogsView.vue b/src/frontend-vue/src/views/admin/AdminLogsView.vue new file mode 100644 index 00000000..a10c0aa8 --- /dev/null +++ b/src/frontend-vue/src/views/admin/AdminLogsView.vue @@ -0,0 +1,80 @@ + + + + + diff --git a/src/frontend-vue/src/views/admin/AdminObservabilityView.vue b/src/frontend-vue/src/views/admin/AdminObservabilityView.vue index a0c33f72..d227c87e 100644 --- a/src/frontend-vue/src/views/admin/AdminObservabilityView.vue +++ b/src/frontend-vue/src/views/admin/AdminObservabilityView.vue @@ -34,8 +34,19 @@ interface ActivityItem { Tags: Record } +interface ErrorItem { + Timestamp: string + Realm: string + Level: string + Message: string + Exception: string | null + SourceContext: string | null + TraceId: string | null +} + const snapshot = ref(null) const activity = ref([]) +const errors = ref([]) const lastUpdate = ref(null) let driftRefreshHandle: ReturnType | null = null @@ -47,16 +58,23 @@ let driftRefreshHandle: ReturnType | null = null // correctly aligned; counts are kept fresh by live events in between. async function refreshSnapshot() { try { - const [snap, act] = await Promise.all([ + const [snap, act, errs] = await Promise.all([ http.addPath('snapshot').get(), http.addPath('activity').setQueryParameter('limit', '50').get(), + http.addPath('errors').setQueryParameter('limit', '50').get(), ]) snapshot.value = snap activity.value = act + errors.value = errs lastUpdate.value = new Date() } catch { /* swallow — keep previous values rather than blink */ } } +function applyLiveError(ev: ErrorItem) { + // Prepend to the error feed (cap at 50). Bounded server-side per realm too. + errors.value = [ev, ...errors.value].slice(0, 50) +} + function applyLiveEvent(ev: ActivityItem) { // Prepend to feed (cap at 50). activity.value = [ev, ...activity.value].slice(0, 50) @@ -87,6 +105,13 @@ onMounted(() => { error: (err) => console.error('[observability] stream error', err), }) }, 'AdminObservabilityView.Observability.Subscribe') + + signalr.runOnEveryReconnect(() => { + signalr.stream('Observability.LogsSubscribe').subscribe({ + next: applyLiveError, + error: (err) => console.error('[observability] logs stream error', err), + }) + }, 'AdminObservabilityView.Observability.LogsSubscribe') }) onUnmounted(() => { @@ -184,6 +209,12 @@ function formatTime(iso: string): string { return d.toLocaleTimeString(language.value, { hour12: false }) } +function errorLevelVariant(level: string): 'error' | 'warning' | 'neutral' { + if (level === 'Error' || level === 'Fatal') return 'error' + if (level === 'Warning') return 'warning' + return 'neutral' +} + const toneClasses: Record = { positive: 'kpi-positive', warning: 'kpi-warning', @@ -253,6 +284,30 @@ const toneClasses: Record = { {{ t('admin.observability.empty', {}, 'No events in the rolling window.') }} + + + +
+
{{ t('admin.observability.errorFeed', {}, 'Recent errors') }}
+
+ {{ t('admin.observability.errorFeedHint', {}, 'Application errors on this realm') }} +
+
+
    +
  • + {{ formatTime(item.Timestamp) }} + {{ item.Level }} +
    +
    {{ item.Message }}
    +
    {{ item.Exception }}
    +
    {{ item.SourceContext }}
    +
    +
  • +
+
+ {{ t('admin.observability.errorsEmpty', {}, 'No errors captured.') }} +
+
@@ -386,4 +441,48 @@ const toneClasses: Record = { text-align: center; color: var(--coar-text-neutral-secondary); } + +.error-card { + flex: 0 0 auto; + max-height: 16rem; +} + +.error-item { + display: grid; + grid-template-columns: 80px auto 1fr; + gap: 0.75rem; + align-items: start; + padding: 0.45rem 0; + border-bottom: 1px solid var(--coar-border-neutral-secondary); + font-size: 0.85rem; +} + +.error-item:last-child { + border-bottom: 0; +} + +.error-body { + display: flex; + flex-direction: column; + gap: 0.15rem; + min-width: 0; +} + +.error-message { + color: var(--coar-text-neutral-primary); + word-break: break-word; +} + +.error-exception { + color: var(--coar-text-semantic-error, #dc2626); + font-family: var(--coar-font-mono, ui-monospace, monospace); + font-size: 0.78rem; + word-break: break-word; +} + +.error-source { + color: var(--coar-text-neutral-secondary); + font-family: var(--coar-font-mono, ui-monospace, monospace); + font-size: 0.72rem; +} diff --git a/src/frontend-vue/src/views/admin/AdminView.vue b/src/frontend-vue/src/views/admin/AdminView.vue index 8cae7ed2..cbbaccaf 100644 --- a/src/frontend-vue/src/views/admin/AdminView.vue +++ b/src/frontend-vue/src/views/admin/AdminView.vue @@ -79,7 +79,7 @@ const sections = computed(() => [ { label: 'admin.apps.title', icon: 'layout-grid', to: '/admin/apps', requirePermissions: ['app:read'] }, { label: 'admin.realms.title', icon: 'globe', to: '/admin/realms', requirePermissions: ['realm:read'] }, { label: 'admin.realmSettings.title', icon: 'sliders-horizontal', to: '/admin/realm-settings', requirePermissions: ['realm-settings:read'] }, - { label: 'admin.authLog.title', icon: 'scroll-text', to: '/admin/auth-log', requirePermissions: ['auth-log:read'] }, + { label: 'admin.logs.title', icon: 'scroll-text', to: '/admin/logs', requirePermissions: ['auth-log:read', 'audit-log:read'] }, { label: 'admin.scheduledJobs.title', icon: 'clock', to: '/admin/scheduled-jobs', requirePermissions: ['scheduled-job:read'] }, { label: 'admin.changeRequests.title', icon: 'inbox', to: '/admin/change-requests', requirePermissions: ['user:write'] }, ], diff --git a/src/frontend-vue/src/views/admin/AuditLogView.vue b/src/frontend-vue/src/views/admin/AuditLogView.vue new file mode 100644 index 00000000..461f42c7 --- /dev/null +++ b/src/frontend-vue/src/views/admin/AuditLogView.vue @@ -0,0 +1,116 @@ + + + + + diff --git a/src/frontend-vue/src/views/admin/AuthLogView.vue b/src/frontend-vue/src/views/admin/AuthLogView.vue index 73393e5c..a2775686 100644 --- a/src/frontend-vue/src/views/admin/AuthLogView.vue +++ b/src/frontend-vue/src/views/admin/AuthLogView.vue @@ -1,91 +1,90 @@ @@ -99,14 +98,22 @@ const gridBuilder = CoarGridBuilder.create() elevated > @@ -114,10 +121,10 @@ const gridBuilder = CoarGridBuilder.create()