wundergraph · jensneuse · Apr 20, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
@@ -27,3 +27,11 @@ go.work.sum
 
 # Serena
 .serena/
+/.playwright-mcp
+
+# PR review workspace — local-only, never commit
+/TODO.md
+/DONE.md
+/FEEDBACK.md
+/COMMENTS.md
+/docs/superpowers/
@@ -184,6 +184,51 @@ docker-build-minikube: docker-build-local
 
 	rm -f mk-*.tar
 
+demo-compose:
+	cd demo && $(MAKE) compose-cache
+
+DEMO_STARTUP_ATTEMPTS ?= 60
+DEMO_STARTUP_SLEEP ?= 0.5
+
+demo:
+	@echo "Composing subgraph schemas..."
+	cd demo && $(MAKE) compose-cache
+	@echo "Starting subgraphs and router..."
+	@echo "Playground will be at http://localhost:3002/"
+	@set -e; \
+	cd demo && go run cmd/all/main.go & \
+	pid=$$!; \
+	trap 'kill $$pid 2>/dev/null || true' EXIT INT TERM HUP; \
+	for p in 4012 4013 4014; do \
+		for i in $$(seq 1 $(DEMO_STARTUP_ATTEMPTS)); do \
+			nc -z 127.0.0.1 $$p && break; \
+			sleep $(DEMO_STARTUP_SLEEP); \
+		done; \
+		nc -z 127.0.0.1 $$p || { echo "subgraph $$p did not start" >&2; exit 1; }; \
+	done; \
+	cd router && go run cmd/router/main.go --config ../demo/router-cache.yaml
+
+benchmark-cache-demo:
+	pnpm dlx tsx benchmark/scripts/run_suite.ts --all \
+		$(if $(VUS),--vus $(VUS),) \
+		$(if $(DURATION),--duration $(DURATION),) \
+		$(if $(RAMP_UP),--ramp-up $(RAMP_UP),) \
+		$(if $(RAMP_DOWN),--ramp-down $(RAMP_DOWN),)
+
+benchmark-cache-demo-scenario:
+	@if [ -z "$(SCENARIO)" ]; then \
+		echo "Usage: make benchmark-cache-demo-scenario SCENARIO=<scenario_name>"; \
+		exit 1; \
+	fi
+	pnpm dlx tsx benchmark/scripts/run_suite.ts --scenario $(SCENARIO) \
+		$(if $(VUS),--vus $(VUS),) \
+		$(if $(DURATION),--duration $(DURATION),) \
+		$(if $(RAMP_UP),--ramp-up $(RAMP_UP),) \
+		$(if $(RAMP_DOWN),--ramp-down $(RAMP_DOWN),)
+
+benchmark-cache-demo-validate:
+	pnpm dlx tsx benchmark/scripts/run_suite.ts validate
+
 run-subgraphs-local:
 	cd demo && go run cmd/all/main.go
 

@@ -0,0 +1,185 @@
+# PR #2777 Review Action Plan
+
+**PR:** https://github.com/wundergraph/cosmo/pull/2777 — entity caching (~40k lines).
+**Reviewers processed:** CodeRabbit bot (37 findings) + SkArchon (5 inline comments).
+**Already addressed by commit `4bf4040f0`:** 25 findings (not repeated here).
+**Second opinion:** codex (0.121.0) — agreed on ignores, escalated two items to blockers.
+
+Legend:
+- **FIX / BLOCKER** — must land before merge.
+- **FIX** — should land before merge; small scope.
+- **IGNORE** — rejected with reason.
+
+---
+
+## BLOCKERS (codex escalation — must fix before merge)
+
+### B1. Fuzz tests must assert a single expected outcome
+*Finding [19] — `composition/tests/v1/directives/entity-cache-fuzz.test.ts` L160-197, 262-288, 412-488.*
+
+Current pattern: `if (result.success) { ... } // if it errored, that's fine`.
+A fuzz suite that accepts both success and failure catches no regressions.
+
+**Action:** for each flagged case, pick the correct expected outcome
+(success with specific config, or failure with specific error code) and lock it in.
+If the current behavior is underspecified, decide the spec first, then assert.
+
+### B2. Parameterize cache-layer test harness (L1-only / L2-only / both)
+*Finding [SkArchon L164, L2184] — `router-tests/entity_caching/harness_test.go:216`.*
+
+`entityCachingOptions(cache)` hard-codes L1+L2 enabled.
+Tests labeled "L1/…" or "L2/…" currently get both layers.
+Any L1-only assertion that passes today may be silently riding on L2.
+This is a cache-correctness PR; ambiguous layer isolation is a real defect.
+
+**Action:**
+1. Add helpers `entityCachingL1OnlyOptions(cache)` and `entityCachingL2OnlyOptions(cache)`
+   mirroring the existing `entityCachingOptions`.
+2. Audit every subtest under `L1/…` and `L2/…` prefixes in
+   `entity_caching_test.go` and switch to the matching helper.
+3. Run the suite under `-race -count=3` after the switch to catch any test
+   that was relying on the other layer.
+
+### B3. Add inverse "L1 disabled → N calls" test for dedupe claims
+*Finding [SkArchon L2235].*
+
+Whenever a test asserts "L1 dedupes to 1 call", add a sibling test with L1
+disabled asserting the subgraph sees the expected 3 calls.
+Prevents false positives if the planner ever starts merging calls for an
+unrelated reason.
+
+**Action:** under each `L1/deduplicates…` subtest, add a companion
+`L1-disabled/…` case using `entityCachingL2OnlyOptions` (or no entity caching)
+that locks in the un-deduped count.
+
+---
+
+## FIX (small, low risk)
+
+### F1. Align `demo/go.mod` OpenTelemetry to router's pattern
+*Finding [1] — codex refinement.*
+
+Router and router-tests pin `go.opentelemetry.io/otel` and all companion
+packages (`otel`, `otel/sdk`, `otel/sdk/metric`, `otel/trace`, `otel/metric`)
+to `require v1.39.0` with `replace → v1.28.0`.
+`demo/go.mod` drifts at `v1.36.0` with no replace.
+CodeRabbit's CVE callout (PATH hijack) is real but negligible for a demo;
+the real fix is consistency with router.
+A repo-wide jump to `v1.43.0` is out of scope for this PR.
+
+**Action:** bump `demo/go.mod` otel require-block to `v1.39.0` and add the
+same `replace` directive block as `router/go.mod:191+`.
+Run `go mod tidy` in `demo/`.
+Leave `router/` and `router-tests/` alone.
+
+### F2. Shell script timeouts and shutdown correctness
+*Findings [6][7][8] — benchmark harness scripts.*
+
+- `benchmark/scripts/stop_stack.sh:13-17` — replace bare `wait "${pid}"`
+  (no-op on non-child PIDs from `start_new_session=True`) with a
+  `kill -0` polling loop (5s timeout) then SIGKILL fallback, THEN
+  `rm -f pid_file`.
+- `benchmark/scripts/capture_pprof.sh:9-12` — add
+  `--connect-timeout 2 --max-time $((PPROF_SECONDS+15))` to both curl calls.
+- `benchmark/scripts/wait_ready.sh:6-14` — add
+  `--connect-timeout 1 --max-time 2` to each probe and
+  `timeout 2 docker exec …` around the redis ping.
+
+### F3. Validate CLI options in `benchmark/scripts/run_suite.ts`
+*Finding [37] L65-87.*
+
+`--vus` can silently become `NaN`; missing `--duration` / `--ramp-up` /
+`--ramp-down` values error deep in the pipeline.
+
+**Action:** parse and validate each flag up front; reject with a clear
+usage message.
+Add a unit test for the option parser if one doesn't exist.
+
+### F4. Documentation cleanups (trivial, batch-fix)
+*Findings [28][29][31][35].*
+
+- `composition/AGENTS.md:60,63,68` — escape `@openfed\_\_…`
+  (currently renders as `@openfed**…`).
+- `docs/entity-caching/ENTITY_CACHING_DEMO.md:5,90,128,159,229,260` —
+  add fence languages (`text` is fine for diagrams).
+- `demo/pkg/subgraphs/cachegraph/subgraph/data.go:146` — the
+  "metric data" comment precedes `recommendedArticlesByViewer`, not metrics.
+  Either delete the comment or move it to line 154 above `metricsData`.
+- `docs/entity-caching/directives.md:13-15` — the naming-principle
+  paragraph says entity caching uses unprefixed directives but the
+  whole doc uses `@openfed__…`.
+  Reword to match the actual surface.
+
+### F5. Simplify or delete the vacuous L1-dedupe test
+*Finding [SkArchon L698-L699] — `entity_caching_test.go:698` `L1/deduplicates repeated entity loads`.*
+
+The test's own 20-line comment (added in `4bf4040f0`) openly admits the
+assertion is vacuous — the planner merges identical entity fetches
+regardless of L1 state.
+SkArchon flagged both "comment too verbose" and "test doesn't add value";
+they're the same observation.
+
+**Action:** **delete the test** and cite `request_scoped_nested_dedup`
+(coordinate L1) and the new B2 L1-only / L2-only harness split as the
+real coverage.
+Keeping a test that admits it proves nothing is worse than removing it.
+(If there's product reason to keep the query-shape pinned, move the
+assertion into an L2-only test where the L1 path is a no-op and the
+assertion is meaningful.)
+
+---
+
+## IGNORE (with rationale)
+
+### I1. CodeRabbit [3] — "use `interface` for warning params object shapes"
+*`composition/src/v1/warnings/params.ts:14-53`.*
+
+Local convention in `composition/` is `export type X = { ... }` for object
+shapes.
+`composition/src/router-configuration/types.ts` has ~15 `export type` shapes
+and zero `export interface`.
+Codex confirmed.
+CodeRabbit's guidance is a generic TS rule that contradicts local style.
+
+### I2. CodeRabbit [4] — "add `@requestScoped` to both `currentViewer` fields in benchmark query"
+*`benchmark/queries/request_scoped_viewer_articles.graphql:2-13`.*
+
+`@openfed__requestScoped` is declared `on FIELD_DEFINITION` in the subgraph
+schema (`demo/pkg/subgraphs/viewer/subgraph/schema.graphqls:7`).
+Clients do not annotate operations with it.
+The benchmark query already exercises coordinate L1 via the schema-side
+declaration on `Query.currentViewer` and `Personalized.currentViewer`.
+Codex confirmed.
+CodeRabbit misread the directive's locations.
+
+### I3. CodeRabbit [30] — "use hyphenated `proto-generated`"
+*`CLAUDE.md:111`.*
+
+Grammatical nit in a guidance file authored by the repo owner.
+No functional impact; editorial freedom.
+
+---
+
+## Summary & merge gate
+
+Must land before merge: **B1, B2, B3** (3 items, all test-quality).
+Should land before merge: **F1, F2, F3, F4, F5** (5 items, mostly trivial).
+Ignored: **I1, I2, I3** (3 items).
+
+Total already-fixed: **25** (by `4bf4040f0`).
+Total remaining work: **8 actionable items**, of which 3 are blockers
+focused on the one systemic weakness codex flagged — weak test isolation
+between cache layers and under-specified fuzz expectations.
+Everything else is cleanup.
+
+## Suggested commit ordering
+
+1. `test(entity-caching): add L1-only / L2-only option helpers` (B2 helpers only).
+2. `test(entity-caching): switch L1/L2 subtests to layer-isolated options` (B2 audit).
+3. `test(entity-caching): add inverse L1-disabled assertions` (B3).
+4. `test(composition): lock single expected outcome in entity-cache fuzz` (B1).
+5. `test(entity-caching): delete vacuous L1-dedupe test` (F5).
+6. `chore(demo): align otel packages with router's v1.39.0 + v1.28.0 replace` (F1).
+7. `chore(benchmark): bound shell probes and shutdowns with timeouts` (F2).
+8. `chore(benchmark): validate run_suite.ts CLI options up front` (F3).
+9. `docs(entity-caching): misc doc cleanups (AGENTS escaping, fences, comment placement, naming)` (F4).
@@ -0,0 +1,5 @@
+.run/
+.tmp-metrics/
+.tmp-pprof/
+results/*
+!results/.gitkeep
@@ -0,0 +1,114 @@
+# Cache Demo Benchmark Suite
+
+Local benchmark harness for the cache demo on `localhost:3002`, using:
+
+- the existing `cache-demo` subgraphs
+- a dedicated Redis Docker container for L2 cache storage on `localhost:6399`
+- k6 for request load
+- router Prometheus and pprof for runtime capture
+
+## Prerequisites
+
+- `go` with the repo’s expected toolchain
+- `pnpm`
+- `k6`
+- `docker`
+- free local ports:
+  - `3002`
+  - `4012`
+  - `4013`
+  - `4014`
+  - `6060`
+  - `8088`
+  - `6399`
+
+## Key Files
+
+- `benchmark/router-cache.redis.yaml`: Redis-backed router config for the benchmark
+- `benchmark/scenarios/cache-demo.json`: scenario manifest
+- `benchmark/queries/*.graphql`: canonical benchmark operations
+- `benchmark/fixtures/*.response.json`: uncached router response fixtures
+- `benchmark/k6/cache_demo.js`: k6 runner with exact response assertion
+
+## Scenarios
+
+- `article_simple`
+- `articles_by_ids_batch`
+- `listing_composite_key`
+- `venue_nested_key`
+- `user_profile_header_sensitive`
+- `catalogs_partial_load`
+- `request_scoped_viewer_articles`
+- `viewer_articles_deep_nested`
+
+## Auth Profiles
+
+The demo uses fake bearer tokens:
+
+- `alice` -> `Bearer token-alice`
+- `bob` -> `Bearer token-bob`
+- `charlie` -> `Bearer token-charlie`
+
+Auth-sensitive scenarios must not fall back to anonymous requests.
+
+## Commands
+
+Validate the manifest and checked-in fixtures:
+
+```bash
+make benchmark-cache-demo-validate
+```
+
+Run the full suite with default load settings:
+
+```bash
+make benchmark-cache-demo
+```
+
+Run one scenario with default load settings:
+
+```bash
+make benchmark-cache-demo-scenario SCENARIO=article_simple
+```
+
+Run one scenario with direct control over k6 stages:
+
+```bash
+pnpm dlx tsx benchmark/scripts/run_suite.ts \
+  --scenario article_simple \
+  --vus 10 \
+  --duration 30s \
+  --ramp-up 5s \
+  --ramp-down 5s
+```
+
+## Output Layout
+
+Result bundles are written under:
+
+```text
+benchmark/results/<timestamp>/<scenario>/<mode>/
+```
+
+Each mode directory contains:
+
+- `summary.json`
+- `k6-summary.json`
+- `metrics-before.prom`
+- `metrics-after.prom`
+- `metrics-delta.json`
+- `redis-info-before.txt`
+- `redis-info-after.txt`
+- `redis-docker-stats-before.json`
+- `redis-docker-stats-after.json`
+- `equivalence.json`
+- `pprof/router_cpu.pb.gz`
+- `pprof/router_heap.pb.gz`
+
+`summary.json` also records the warmup request count, k6 stage config, parsed k6 summary, and selected Redis INFO and Docker stats values so downstream interpretation does not need to scrape raw artifacts first.
+
+## Notes
+
+- The harness owns the stack. If the required ports are already in use, startup fails rather than benchmarking against a dirty environment.
+- L2 cache storage is intentionally externalized to Redis so router memory measurements do not include the full L2 object footprint.
+- The suite always performs deterministic uncached equivalence checks before load generation and serial warmup requests before each mode run.
@@ -0,0 +1 @@
+{"data":{"article":{"id":"1","title":"Introduction to GraphQL Caching","authorName":"Alice"}}}
@@ -0,0 +1 @@
+{"data":{"articlesByIds":[{"id":"1","title":"Introduction to GraphQL Caching","authorName":"Alice"},{"id":"2","title":"Advanced Federation Patterns","authorName":"Bob"},{"id":"3","title":"Cache Invalidation Strategies","authorName":"Charlie"}]}}
@@ -0,0 +1 @@
+{"data":{"catalogs":[{"id":"c1","name":"Electronics","description":"Consumer electronics, gadgets, and accessories.","lastUpdated":"2025-03-15T08:00:00Z","itemCount":342},{"id":"c2","name":"Books","description":"Fiction, non-fiction, technical books, and audiobooks.","lastUpdated":"2025-03-20T12:00:00Z","itemCount":1205},{"id":"c3","name":"Clothing","description":"Men's, women's, and children's apparel.","lastUpdated":"2025-03-25T16:00:00Z","itemCount":567}]}}
@@ -0,0 +1 @@
+{"data":{"listing":{"sellerId":"s1","sku":"WIDGET-01","title":"Premium Widget","price":29.99,"currency":"USD","inStock":true}}}
@@ -0,0 +1 @@
+{"data":{"currentViewer":{"id":"v1","name":"Alice","email":"alice@example.com"},"articles":[{"id":"1","title":"Introduction to GraphQL Caching","currentViewer":{"id":"v1","name":"Alice"},"viewCount":12453,"rating":4.7},{"id":"2","title":"Advanced Federation Patterns","currentViewer":{"id":"v1","name":"Alice"},"viewCount":8921,"rating":4.3},{"id":"3","title":"Cache Invalidation Strategies","currentViewer":{"id":"v1","name":"Alice"},"viewCount":15678,"rating":4.9},{"id":"4","title":"Performance Tuning with Entity Caching","currentViewer":{"id":"v1","name":"Alice"},"viewCount":6234,"rating":4.1}]}}
@@ -0,0 +1 @@
+{"data":{"userProfile":{"id":"u1","username":"alice","role":"admin","email":"alice@example.com"}}}
@@ -0,0 +1 @@
+{"data":{"userProfile":{"id":"u2","username":"bob","role":"editor","email":"bob@example.com"}}}
@@ -0,0 +1 @@
+{"data":{"venue":{"address":{"id":"v1"},"name":"Grand Conference Hall","capacity":500,"city":"Berlin"}}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data":{"article":{"id":"1","title":"Introduction to GraphQL Caching","authorName":"Alice"}}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data":{"articlesByIds":[{"id":"1","title":"Introduction to GraphQL Caching","authorName":"Alice"},{"id":"2","title":"Advanced Federation Patterns","authorName":"Bob"},{"id":"3","title":"Cache Invalidation Strategies","authorName":"Charlie"}]}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data":{"catalogs":[{"id":"c1","name":"Electronics","description":"Consumer electronics, gadgets, and accessories.","lastUpdated":"2025-03-15T08:00:00Z","itemCount":342},{"id":"c2","name":"Books","description":"Fiction, non-fiction, technical books, and audiobooks.","lastUpdated":"2025-03-20T12:00:00Z","itemCount":1205},{"id":"c3","name":"Clothing","description":"Men's, women's, and children's apparel.","lastUpdated":"2025-03-25T16:00:00Z","itemCount":567}]}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data":{"listing":{"sellerId":"s1","sku":"WIDGET-01","title":"Premium Widget","price":29.99,"currency":"USD","inStock":true}}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data":{"currentViewer":{"id":"v1","name":"Alice","email":"alice@example.com"},"articles":[{"id":"1","title":"Introduction to GraphQL Caching","currentViewer":{"id":"v1","name":"Alice"},"viewCount":12453,"rating":4.7},{"id":"2","title":"Advanced Federation Patterns","currentViewer":{"id":"v1","name":"Alice"},"viewCount":8921,"rating":4.3},{"id":"3","title":"Cache Invalidation Strategies","currentViewer":{"id":"v1","name":"Alice"},"viewCount":15678,"rating":4.9},{"id":"4","title":"Performance Tuning with Entity Caching","currentViewer":{"id":"v1","name":"Alice"},"viewCount":6234,"rating":4.1}]}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data":{"userProfile":{"id":"u1","username":"alice","role":"admin","email":"alice@example.com"}}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data":{"userProfile":{"id":"u2","username":"bob","role":"editor","email":"bob@example.com"}}}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"data":{"venue":{"address":{"id":"v1"},"name":"Grand Conference Hall","capacity":500,"city":"Berlin"}}}