From 7761077504b6fddc90a4618ad4ffe77dfed493a6 Mon Sep 17 00:00:00 2001 From: Jens Neuse Date: Fri, 6 Mar 2026 09:37:32 +0100 Subject: [PATCH 1/2] docs: comprehensive caching and resolve package documentation Create three well-structured documentation files for the entity caching system and resolve package: 1. **v2/pkg/engine/resolve/CLAUDE.md** - Full resolve package reference covering the resolution pipeline (Resolver, Loader, Resolvable) and entity caching internals. Single file because caching is embedded in the fetch execution flow. 2. **ENTITY_CACHING_INTEGRATION.md** - Router integration guide with complete public APIs, configuration options, cache key formats, invalidation mechanisms, analytics, and a full end-to-end example. Another agent can fully integrate entity caching using only this file. 3. **CLAUDE.md (root)** - High-level repo overview with package map, data flow, and links to deep references. Replaces entity-caching-specific content with concise architecture documentation. Also delete execution/engine/CLAUDE.md (cache log rules merged into resolve/CLAUDE.md). Co-Authored-By: Claude Haiku 4.5 --- CLAUDE.md | 366 +++-------------- ENTITY_CACHING_INTEGRATION.md | 680 ++++++++++++++++++++++++++++++++ execution/engine/CLAUDE.md | 25 -- v2/pkg/engine/resolve/CLAUDE.md | 589 +++++++++++++++++++++++++++ 4 files changed, 1329 insertions(+), 331 deletions(-) create mode 100644 ENTITY_CACHING_INTEGRATION.md delete mode 100644 execution/engine/CLAUDE.md create mode 100644 v2/pkg/engine/resolve/CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md index b276d43031..f032cd8e10 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,313 +1,67 @@ -# Entity Caching Reference +# graphql-go-tools -GraphQL Federation entity caching system with L1 (per-request) and L2 (external) caches. +GraphQL Router / API Gateway framework for Go. Federation-first, with query planning, parallel resolution, and entity caching. -## Architecture Overview +Module: `github.com/wundergraph/graphql-go-tools` (Go 1.25, go.work workspace) -| Cache | Storage | Scope | Key Fields | Thread Safety | -|-------|---------|-------|------------|---------------| -| **L1** | `sync.Map` in Loader | Single request | `@key` only | sync.Map | -| **L2** | External (LoaderCache) | Cross-request | `@key` only | Atomic stats | +## Data Flow -**Key Principle**: Both L1 and L2 use only `@key` fields for stable entity identity. - -## Key Files - -| File | Purpose | -|------|---------| -| `v2/pkg/engine/resolve/loader.go` | L1/L2 cache core: `prepareCacheKeys`, `tryL1CacheLoad`, `tryL2CacheLoad`, `populateL1Cache` | -| `v2/pkg/engine/resolve/loader_json_copy.go` | Shallow copy for self-referential entities | -| `v2/pkg/engine/resolve/caching.go` | `RenderCacheKeys`, `EntityQueryCacheKeyTemplate`, `RootQueryCacheKeyTemplate` | -| `v2/pkg/engine/resolve/context.go` | `CachingOptions`, `CacheStats`, tracking methods | -| `v2/pkg/engine/resolve/fetch.go` | `FetchCacheConfiguration`, `FetchInfo.ProvidesData` | -| `v2/pkg/engine/plan/visitor.go` | `configureFetchCaching()`, `isEntityBoundaryField` | -| `v2/pkg/engine/plan/federation_metadata.go` | `EntityCacheConfiguration`, `RootFieldCacheConfiguration` | -| `v2/pkg/engine/datasource/graphql_datasource/graphql_datasource.go` | `buildCacheKeyVariable()`, cache key template building | -| `execution/engine/config_factory_federation.go` | `SubgraphCachingConfig`, per-subgraph configuration | -| `execution/engine/federation_caching_test.go` | E2E caching tests | -| `v2/pkg/engine/resolve/l1_cache_test.go` | L1 cache unit tests | - -## Core Types - -### Cache Key Templates -```go -// Entity caching - same @key-only keys for both L1 and L2 -type EntityQueryCacheKeyTemplate struct { - Keys *ResolvableObjectVariable // @key fields only (no @requires) -} -func (e *EntityQueryCacheKeyTemplate) RenderCacheKeys(a arena.Arena, ctx *Context, items []*astjson.Value, prefix string) ([]*CacheKey, error) - -// Root field caching - same template for L1 and L2 -type RootQueryCacheKeyTemplate struct { - RootFields []QueryField // TypeName + FieldName + Args -} ``` - -### Configuration Types -```go -// Per-subgraph caching config (explicit opt-in) -type SubgraphCachingConfig struct { - SubgraphName string - EntityCaching plan.EntityCacheConfigurations // For _entities queries - RootFieldCaching plan.RootFieldCacheConfigurations // For root queries -} - -type EntityCacheConfiguration struct { - TypeName string // e.g., "User" - CacheName string - TTL time.Duration - IncludeSubgraphHeaderPrefix bool -} - -type RootFieldCacheConfiguration struct { - TypeName string // e.g., "Query" - FieldName string // e.g., "topProducts" - CacheName string - TTL time.Duration - IncludeSubgraphHeaderPrefix bool -} -``` - -### Cache Stats (Thread Safety) -```go -type CacheStats struct { - L1Hits int64 // Main thread only (non-atomic) - L1Misses int64 // Main thread only (non-atomic) - L2Hits *atomic.Int64 // Goroutine-safe (atomic) - L2Misses *atomic.Int64 // Goroutine-safe (atomic) -} -``` - -## Enabling Caching - -### Runtime Options -```go -ctx.ExecutionOptions.Caching = CachingOptions{ - EnableL1Cache: true, // Per-request entity cache - EnableL2Cache: true, // External cache -} -``` - -### Per-Subgraph Configuration (L2 only) -```go -subgraphCachingConfigs := engine.SubgraphCachingConfigs{ - { - SubgraphName: "products", - RootFieldCaching: plan.RootFieldCacheConfigurations{ - {TypeName: "Query", FieldName: "topProducts", CacheName: "default", TTL: 30 * time.Second}, - }, - }, - { - SubgraphName: "accounts", - EntityCaching: plan.EntityCacheConfigurations{ - {TypeName: "User", CacheName: "default", TTL: 30 * time.Second}, - }, - }, -} - -opts := []engine.FederationEngineConfigFactoryOption{ - engine.WithSubgraphEntityCachingConfigs(subgraphCachingConfigs), -} -``` - -## Cache Flow - -### Sequential Execution (`tryCacheLoad`) -1. `prepareCacheKeys()` - Generate L1 and L2 cache keys -2. `tryL1CacheLoad()` - Check L1 (main thread) -3. `tryL2CacheLoad()` - Check L2 (main thread) -4. Fetch if needed, then `populateL1Cache()` and `updateL2Cache()` - -### Parallel Execution (`resolveParallel`) -1. **Main thread**: `prepareCacheKeys()` + `tryL1CacheLoad()` for all nodes -2. **Goroutines**: `tryL2CacheLoad()` + fetch via `loadFetchL2Only()` -3. **Main thread**: Merge results, populate L1 cache - -**Rationale**: L1 is cheap (in-memory), check on main thread to skip goroutine work early. L2/fetch are expensive, run in parallel. - -## Self-Referential Entity Fix - -**Problem**: When `User.friends` returns the same `User` entity, L1 cache causes pointer aliasing → stack overflow on merge. - -**Solution**: `shallowCopyProvidedFields()` in `loader_json_copy.go` creates copies based on `ProvidesData` schema. - -```go -// In tryL1CacheLoad: -ck.FromCache = l.shallowCopyProvidedFields(cachedValue, info.ProvidesData) -``` - -## ProvidesData and Validation - -`FetchInfo.ProvidesData` describes what fields a fetch provides. Used by: -- `validateItemHasRequiredData()` - Check if cached entity is complete -- `shallowCopyProvidedFields()` - Copy only required fields - -**Critical**: For nested entity fetches, `ProvidesData` must contain entity fields (`id`, `username`), NOT the parent field (`author`). - -## configureFetchCaching Logic - -```go -func configureFetchCaching(internal, external) FetchCacheConfiguration { - // 1. Always preserve CacheKeyTemplate for L1 - result := FetchCacheConfiguration{CacheKeyTemplate: external.Caching.CacheKeyTemplate} - - // 2. Check global disable - if v.Config.DisableEntityCaching { return result } - - // 3. Determine fetch type FIRST - if external.RequiresEntityFetch || external.RequiresEntityBatchFetch { - // Entity fetch: all rootFields same type, use first - entityTypeName := internal.rootFields[0].TypeName - cacheConfig := fedConfig.EntityCacheConfig(entityTypeName) - } else { - // Root field fetch: need exactly 1 rootField - if len(internal.rootFields) != 1 { return result } - cacheConfig := fedConfig.RootFieldCacheConfig(rootField.TypeName, rootField.FieldName) - } -} -``` - -## Unit Testing - -```go -// Standard test setup -ctrl := gomock.NewController(t) -defer ctrl.Finish() - -ds := NewMockDataSource(ctrl) -ds.EXPECT().Load(gomock.Any(), gomock.Any(), gomock.Any()). - DoAndReturn(func(ctx context.Context, headers any, input []byte) ([]byte, error) { - return []byte(`{"data":{...}}`), nil - }).Times(1) - -loader := &Loader{caches: map[string]LoaderCache{"default": cache}} - -// REQUIRED: Disable singleFlight for unit tests -ctx := NewContext(context.Background()) -ctx.ExecutionOptions.DisableSubgraphRequestDeduplication = true -ctx.ExecutionOptions.Caching = CachingOptions{EnableL1Cache: true, EnableL2Cache: true} - -// REQUIRED: Always use arena -ar := arena.NewMonotonicArena(arena.WithMinBufferSize(1024)) -resolvable := NewResolvable(ar, ResolvableOptions{}) -resolvable.Init(ctx, nil, ast.OperationTypeQuery) - -err := loader.LoadGraphQLResponseData(ctx, response, resolvable) -out := fastjsonext.PrintGraphQLResponse(resolvable.data, resolvable.errors) -``` - -### FakeLoaderCache -Test mock in `cache_load_test.go` with TTL support and operation logging. - -### Assertions - -**IMPORTANT**: Always use exact assertions in cache tests. Never use vague comparisons. - -```go -// GOOD: Exact values - always preferred -assert.Equal(t, 3, hitCount, "should have exactly 3 L1 hits") -assert.Equal(t, int64(12), l1HitsInt, "should have exactly 12 L1 hits") -assert.Equal(t, 2, accountsCalls, "should call accounts subgraph exactly twice") - -// BAD: Never use vague comparisons -assert.GreaterOrEqual(t, hitCount, 1) // DON'T DO THIS -assert.Greater(t, l1HitsInt, int64(0)) // DON'T DO THIS -assert.LessOrEqual(t, calls, 5) // DON'T DO THIS -``` - -Exact assertions catch regressions that vague assertions miss. If the expected value changes, update the test to reflect the new exact value. - -### Snapshot Comments - -**IMPORTANT**: Every event line in a `CacheAnalyticsSnapshot` assertion MUST have a brief comment explaining **why** that event occurred. Focus on causation, not field values. - -```go -// GOOD: explains the "why" -L2Reads: []resolve.CacheKeyEvent{ - {CacheKey: keyUser, Kind: resolve.CacheKeyMiss, ...}, // First request, L2 empty - {CacheKey: keyUser, Kind: resolve.CacheKeyHit, ...}, // Populated by Request 1 -}, - -// BAD: restates the field value -{CacheKey: keyUser, Kind: resolve.CacheKeyMiss, ...}, // this is a miss -``` - -## Federation Test Setup - -Test services: `accounts`, `products`, `reviews` in `execution/federationtesting/` - -### Testing Entity Caching vs @provides -```graphql -type Review { - # @provides - gateway trusts subgraph, NO entity resolution - author: User! @provides(fields: "username") - - # No @provides - gateway MUST resolve via _entities - # Use for testing L1/L2 caching - authorWithoutProvides: User! -} -``` - -### Run Tests -```bash -go test -run "TestL1Cache" ./v2/pkg/engine/resolve/... -v -go test -run "TestFederationCaching" ./execution/engine/... -v -go test -race ./execution/engine/... -v # Race detector -``` - -## astjson API Reference - -```go -// Create values on arena -astjson.ObjectValue(arena) -astjson.ArrayValue(arena) -astjson.StringValue(arena, string) -astjson.StringValueBytes(arena, []byte) -astjson.NumberValue(arena, string) -astjson.TrueValue(arena) -astjson.FalseValue(arena) -astjson.NullValue // Global constant (not a function) - -// Manipulate -value.Set(arena, key, val) -value.SetArrayItem(arena, idx, val) -value.Get(keys...) -value.GetArray() -value.GetStringBytes() -value.MarshalTo([]byte) -value.Type() // TypeNull, TypeTrue, TypeObject, etc. -``` - -## LoaderCache Interface - -```go -type LoaderCache interface { - Get(ctx context.Context, keys []string) ([]*CacheEntry, error) - Set(ctx context.Context, entries []*CacheEntry, ttl time.Duration) error - Delete(ctx context.Context, keys []string) error -} - -type CacheEntry struct { - Key string - Value []byte // JSON-encoded entity -} -``` - -## Always use exact assertions - -Use `assert.Equal` with exact expected values. Never use `Contains`, `GreaterOrEqual`, `LessOrEqual`, or any vague comparison. -For objects or slices, always compare against a fully defined expected value, not just a subset. - -```go -// CORRECT -assert.Equal(t, 3, len(log), "should have exactly 3 cache operations") -assert.Equal(t, 1, tracker.GetCount(host), "should call subgraph exactly once") -assert.Equal(t, int64(12), stats.L1Hits, "should have exactly 12 L1 hits") - -// WRONG — hides regressions -assert.GreaterOrEqual(t, len(log), 1) -assert.Greater(t, stats.L1Hits, int64(0)) -assert.Contains(t, log[0].Keys, expectedKey) +parse → normalize → validate → plan → resolve → response ``` -If the expected value changes due to a code change, update the test to the new exact value. \ No newline at end of file +## Package Map + +### Core (v2/pkg/) + +| Package | Purpose | +|---------|---------| +| `ast` | GraphQL AST representation | +| `astparser` | GraphQL parser (schema + operations) | +| `astnormalization` | AST normalization passes | +| `astvalidation` | Schema and query validation | +| `astvisitor` | AST visitor pattern for tree walking | +| `astprinter` | AST to string serialization | +| `asttransform` | AST transformations | +| `astimport` | AST import/merge utilities | +| `fastjsonext` | JSON manipulation extensions (astjson API) | +| `federation` | Federation composition utilities | +| `errorcodes` | Error code definitions | + +### Engine (v2/pkg/engine/) + +| Package | Purpose | +|---------|---------| +| `plan` | Query planning, federation metadata, cache configuration types | +| **`resolve`** | **Resolution engine: fetching, caching, rendering** → see [resolve/CLAUDE.md](v2/pkg/engine/resolve/CLAUDE.md) | +| `datasource/graphql_datasource` | GraphQL subgraph datasource adapter | +| `postprocess` | Response post-processing passes (L1 cache optimization, fetch tree building) | + +### Execution (execution/) + +| Package | Purpose | +|---------|---------| +| `engine` | Federation engine config factory (`SubgraphCachingConfig`, `WithSubgraphEntityCachingConfigs`), E2E tests | +| `federationtesting` | Test federation services: accounts, products, reviews | +| `graphql` | GraphQL execution utilities | + +## Key Architectural Decisions + +- **Federation-first**: designed for federated GraphQL with entity resolution and `@key`/`@provides`/`@requires` +- **Arena-based allocation**: JSON values live on arena memory (no GC pressure), released per-request +- **Parallel resolution**: fetch tree with Sequence/Parallel nodes, 4-phase parallel execution with L1/L2 caching +- **Two-pass rendering**: pre-walk (validate, collect errors) + print-walk (render JSON) + +## Entity Caching + +Two-level entity caching system (L1 per-request + L2 external). See: +- [v2/pkg/engine/resolve/CLAUDE.md](v2/pkg/engine/resolve/CLAUDE.md) — full resolve package reference (resolution pipeline + caching internals) +- [ENTITY_CACHING_INTEGRATION.md](ENTITY_CACHING_INTEGRATION.md) — router integration guide (public APIs, configuration, examples) + +## Testing Conventions + +- **Exact assertions only**: use `assert.Equal` with exact expected values, never `GreaterOrEqual`, `Contains`, or vague comparisons +- **Snapshot comments**: every event line in `CacheAnalyticsSnapshot` assertions must explain **why** that event occurred +- **Cache log rule**: every `ClearLog()` must have `GetLog()` + assertions before the next `ClearLog()` +- **Federation test services**: `accounts`, `products`, `reviews` in `execution/federationtesting/` +- Run: `go test ./v2/pkg/engine/resolve/... -v` and `go test ./execution/engine/... -v` diff --git a/ENTITY_CACHING_INTEGRATION.md b/ENTITY_CACHING_INTEGRATION.md new file mode 100644 index 0000000000..94f20e4ed3 --- /dev/null +++ b/ENTITY_CACHING_INTEGRATION.md @@ -0,0 +1,680 @@ +# Entity Caching Integration Guide + +This guide covers everything needed to integrate the entity caching system into a GraphQL Federation router. After reading this, you should be able to fully configure L1/L2 caching, implement a cache backend, set up invalidation, and collect analytics. + +## Overview + +The caching system has two levels: + +| Level | Storage | Scope | Applies To | Default | +|-------|---------|-------|-----------|---------| +| **L1** | In-memory `sync.Map` per request | Single request | Entity fetches only | Disabled | +| **L2** | External cache (Redis, etc.) | Cross-request with TTL | Entity + root field fetches | Disabled | + +Both levels are opt-in and disabled by default. L1 prevents redundant fetches for the same entity within a single request. L2 shares entity data across requests. + +**Key principle**: Cache keys use only `@key` fields for stable entity identity (never `@requires`). + +## 1. Implement the LoaderCache Interface + +To use L2 caching, implement the `LoaderCache` interface from `v2/pkg/engine/resolve`: + +```go +import "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" + +type LoaderCache interface { + // Get retrieves cache entries by keys. + // Returns a slice of the same length as keys. Use nil for cache misses. + // Called from goroutines during parallel resolution — must be thread-safe. + Get(ctx context.Context, keys []string) ([]*resolve.CacheEntry, error) + + // Set stores cache entries with a TTL. + // Called from goroutines during parallel resolution — must be thread-safe. + Set(ctx context.Context, entries []*resolve.CacheEntry, ttl time.Duration) error + + // Delete removes cache entries by keys. + // Called during cache invalidation (extension-based, mutation-based). + Delete(ctx context.Context, keys []string) error +} + +type CacheEntry struct { + Key string // Cache key string (JSON format) + Value []byte // JSON-encoded entity data + RemainingTTL time.Duration // Remaining TTL from cache (0 = unknown/not supported) +} +``` + +**Thread safety requirement**: `Get`, `Set`, and `Delete` may be called from multiple goroutines during parallel fetch execution. Your implementation must be safe for concurrent use. + +**RemainingTTL**: If your cache backend supports it, return the remaining TTL in `CacheEntry.RemainingTTL`. This is used for cache analytics (cache age tracking) and shadow mode staleness detection. Return 0 if not supported. + +## 2. Configure Per-Subgraph Caching + +### SubgraphCachingConfig + +Each subgraph can have independent caching configuration. Pass these via the factory option: + +```go +import ( + "github.com/wundergraph/graphql-go-tools/execution/engine" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan" +) + +subgraphCachingConfigs := engine.SubgraphCachingConfigs{ + { + SubgraphName: "accounts", // Must match SubgraphConfiguration.Name + EntityCaching: plan.EntityCacheConfigurations{...}, + RootFieldCaching: plan.RootFieldCacheConfigurations{...}, + MutationFieldCaching: plan.MutationFieldCacheConfigurations{...}, + MutationCacheInvalidation: plan.MutationCacheInvalidationConfigurations{...}, + SubscriptionEntityPopulation: plan.SubscriptionEntityPopulationConfigurations{...}, + }, +} + +factory := engine.NewFederationEngineConfigFactory( + ctx, + subgraphsConfigs, + engine.WithSubgraphEntityCachingConfigs(subgraphCachingConfigs), +) +config, err := factory.BuildEngineConfiguration() +``` + +### Entity Cache Configuration + +Controls L2 caching for entity types resolved via `_entities` queries: + +```go +plan.EntityCacheConfiguration{ + // TypeName is the entity type to cache (must match __typename from subgraph). + TypeName: "User", + + // CacheName identifies which LoaderCache instance to use. + // Multiple entity types can share a cache by using the same name. + CacheName: "default", + + // TTL specifies how long cached entities remain valid. + // Zero TTL means entries never expire (not recommended for production). + TTL: 60 * time.Second, + + // IncludeSubgraphHeaderPrefix controls whether forwarded headers affect cache keys. + // When true, cache keys include a hash of headers sent to the subgraph, + // ensuring different header configurations (e.g., different auth tokens) + // use separate cache entries. + IncludeSubgraphHeaderPrefix: true, + + // EnablePartialCacheLoad enables fetching only cache-missed entities. + // Default (false): any miss in a batch refetches ALL entities. + // When true: only missing entities are fetched, cached ones served directly. + EnablePartialCacheLoad: false, + + // HashAnalyticsKeys controls whether entity keys are hashed or stored raw + // in cache analytics. When true, KeyHash is populated instead of KeyRaw. + HashAnalyticsKeys: false, + + // ShadowMode enables shadow caching: L2 reads/writes happen but cached data + // is never served. Fresh data is always fetched and compared against cache + // for staleness detection. L1 cache is unaffected. + ShadowMode: false, +} +``` + +### Root Field Cache Configuration + +Controls L2 caching for root query fields (e.g., `Query.topProducts`): + +```go +plan.RootFieldCacheConfiguration{ + TypeName: "Query", + FieldName: "topProducts", + CacheName: "default", + TTL: 30 * time.Second, + IncludeSubgraphHeaderPrefix: true, + + // EntityKeyMappings enables cache sharing between root fields and entity fetches. + // When set, the L2 cache key uses entity key format instead of root field format. + // Example: Query.user(id: "123") shares cache with User entity key {"id":"123"}. + EntityKeyMappings: []plan.EntityKeyMapping{ + { + EntityTypeName: "User", + FieldMappings: []plan.FieldMapping{ + { + EntityKeyField: "id", // @key field on User + ArgumentPath: []string{"id"}, // Root field argument name + }, + }, + }, + }, + + ShadowMode: false, +} +``` + +### Mutation Field Cache Configuration + +Controls whether entity fetches triggered by a mutation populate L2: + +```go +plan.MutationFieldCacheConfiguration{ + // Mutation field name + FieldName: "addReview", + + // By default, mutations skip L2 reads AND L2 writes. + // Set to true to allow entity fetches during this mutation to write to L2. + EnableEntityL2CachePopulation: true, +} +``` + +**Mutation caching behavior**: +- Mutations **always skip L2 reads** (always fetch fresh from subgraph) +- Mutations **skip L2 writes by default** +- With `EnableEntityL2CachePopulation: true`, entity fetches triggered by this mutation **will write to L2** + +### Mutation Cache Invalidation Configuration + +Configures automatic L2 cache deletion after a mutation completes: + +```go +plan.MutationCacheInvalidationConfiguration{ + FieldName: "updateUser", + // EntityTypeName can be omitted — it's inferred from the mutation return type. + EntityTypeName: "User", +} +``` + +When the mutation returns an entity with `@key` fields, the corresponding L2 cache entry is deleted. + +### Subscription Entity Population Configuration + +Controls how subscription events update the L2 cache: + +```go +plan.SubscriptionEntityPopulationConfiguration{ + TypeName: "Product", + CacheName: "default", + TTL: 30 * time.Second, + IncludeSubgraphHeaderPrefix: true, + + // When true and the subscription only provides @key fields (no additional + // entity fields), DELETE the L2 cache entry on each event. + // When false (default), populate L2 with entity data from the event. + EnableInvalidationOnKeyOnly: false, +} +``` + +**Two modes**: +- **Populate** (default): subscription provides entity fields beyond `@key` → write to L2 +- **Invalidate** (`EnableInvalidationOnKeyOnly: true`): subscription provides only `@key` → delete from L2 + +## 3. Wire Caches into the Resolver + +Register your `LoaderCache` implementations in the `ResolverOptions`: + +```go +resolver := resolve.New(ctx, resolve.ResolverOptions{ + MaxConcurrency: 32, + + // Register named cache instances (referenced by CacheName in configs) + Caches: map[string]resolve.LoaderCache{ + "default": myRedisCache, + "fast": myInMemoryCache, + }, + + // Required for extension-based cache invalidation + // Maps subgraphName → entityTypeName → invalidation config + EntityCacheConfigs: map[string]map[string]*resolve.EntityCacheInvalidationConfig{ + "accounts": { + "User": { + CacheName: "default", + IncludeSubgraphHeaderPrefix: true, + }, + }, + }, + + // ... other options +}) +``` + +## 4. Enable Caching at Runtime + +Set caching options per-request on the execution context: + +```go +ctx := resolve.NewContext(context.Background()) +ctx.ExecutionOptions.Caching = resolve.CachingOptions{ + // Enable per-request in-memory entity cache + EnableL1Cache: true, + + // Enable external cross-request cache + EnableL2Cache: true, + + // Enable detailed cache analytics collection + EnableCacheAnalytics: true, + + // Optional: transform L2 cache keys (e.g., for tenant isolation) + L2CacheKeyInterceptor: func(ctx context.Context, key string, info resolve.L2CacheKeyInterceptorInfo) string { + tenantID := ctx.Value("tenant-id").(string) + return tenantID + ":" + key + }, +} +``` + +**L2CacheKeyInterceptor** receives: +```go +type L2CacheKeyInterceptorInfo struct { + SubgraphName string // e.g., "accounts" + CacheName string // e.g., "default" +} +``` + +The interceptor is applied **after** subgraph header prefix. It does NOT affect L1 keys. + +## 5. Cache Key Format + +### Entity Keys + +Generated by `EntityQueryCacheKeyTemplate` from `@key` fields: +```json +{"__typename":"User","key":{"id":"123"}} +{"__typename":"Product","key":{"upc":"top-1"}} +{"__typename":"Order","key":{"id":"1","orgId":"acme"}} +``` + +### Root Field Keys + +Generated by `RootQueryCacheKeyTemplate` from field name and arguments: +```json +{"__typename":"Query","field":"topProducts"} +{"__typename":"Query","field":"user","args":{"id":"123"}} +{"__typename":"Query","field":"search","args":{"max":10,"term":"C3PO"}} +``` + +Arguments are sorted alphabetically for stable key generation. + +### Key Transformations (applied in order) + +1. **Subgraph header hash prefix** (when `IncludeSubgraphHeaderPrefix = true`): + ``` + {headerHash}:{"__typename":"User","key":{"id":"123"}} + ``` + +2. **L2CacheKeyInterceptor** (when set): + ``` + tenant-X:{headerHash}:{"__typename":"User","key":{"id":"123"}} + ``` + +### Entity Field Argument-Aware Keys + +When entity fields have arguments (e.g., `greeting(style: "formal")`), the field argument values are hashed via xxhash and appended as a suffix to the cache key. Different argument values produce different cache entries. + +### EntityKeyMappings (Cache Sharing) + +When `EntityKeyMappings` is configured on a root field, the L2 cache key uses entity key format instead of root field format. This means: +- `Query.user(id: "123")` → cache key `{"__typename":"User","key":{"id":"123"}}` +- A subsequent `_entities` fetch for `User(id: "123")` hits the same cache entry + +## 6. Cache Behavior by Operation Type + +### Queries + +``` +L1 check (main thread, entity fetches only) + ↓ miss +L2 check (goroutine, entity + root fetches) + ↓ miss +Subgraph fetch (goroutine) + ↓ response +Populate L1 + L2 (main thread for L1, goroutine for L2) +``` + +L1 is checked first on the main thread. If it's a complete hit, the goroutine is not spawned (saves overhead). L2 and fetch happen in parallel goroutines. + +### Mutations + +- **Always skip L2 reads** — fetch fresh data from subgraph +- **Skip L2 writes by default** — unless `EnableEntityL2CachePopulation: true` on the mutation field +- **Optional invalidation** — with `MutationCacheInvalidationConfiguration`, delete L2 entry after mutation +- **Mutation impact detection** — when analytics enabled, compare mutation response against cached value + +### Subscriptions + +Based on `SubscriptionEntityPopulationConfiguration`: +- **Populate mode** (default): on each subscription event, write entity data to L2 +- **Invalidate mode** (`EnableInvalidationOnKeyOnly: true`): on each event with only `@key` fields, delete L2 entry + +## 7. Cache Invalidation + +### Mutation-Triggered Invalidation + +Configure via `MutationCacheInvalidationConfiguration`. After a mutation completes and returns an entity, the L2 cache entry for that entity is deleted. + +### Subgraph Response Extension Invalidation + +Subgraphs can signal cache invalidation through GraphQL response extensions: + +```json +{ + "data": { "updateUser": { "id": "1", "name": "Updated" } }, + "extensions": { + "cacheInvalidation": { + "keys": [ + { "typename": "User", "key": { "id": "1" } }, + { "typename": "User", "key": { "id": "2" } } + ] + } + } +} +``` + +The engine automatically: +1. Parses `extensions.cacheInvalidation.keys` from each subgraph response +2. Builds L2 cache keys matching entity type and key fields +3. Applies subgraph header prefix and `L2CacheKeyInterceptor` transformations +4. Calls `LoaderCache.Delete()` for each key +5. **Optimization**: skips delete if the same key is being written in the same fetch (no unnecessary round-trip) + +**Requirements for extension-based invalidation**: +- `EntityCacheConfigs` must be set on `ResolverOptions` (maps subgraph name → entity type → cache config) +- `EnableL2Cache` must be true on the request context + +### Subscription-Based Invalidation + +With `EnableInvalidationOnKeyOnly: true`, subscription events that only contain `@key` fields trigger L2 deletion. + +### Manual Invalidation + +Call `LoaderCache.Delete()` directly with cache keys. The key format is: +``` +[optional-interceptor-prefix:][optional-header-hash:]{"__typename":"TypeName","key":{...}} +``` + +## 8. Partial Cache Loading + +Controls what happens when some entities in a batch are cached and others are not. + +**Default (`EnablePartialCacheLoad: false`)**: +Any cache miss in a batch → refetch ALL entities from the subgraph. This keeps the cache maximally fresh because every entity gets a fresh value on each batch miss. + +**Enabled (`EnablePartialCacheLoad: true`)**: +Only missing entities are fetched from the subgraph. Cached entities are served directly within their TTL window. This reduces subgraph load but cached entities may be slightly stale (within TTL). + +Choose based on your freshness vs. performance tradeoff. + +## 9. Shadow Mode + +Shadow mode lets you test caching in production without serving cached data to clients. + +**Behavior**: +- L2 cache reads and writes happen normally +- Cached data is **never served** — fresh data is always fetched from the subgraph +- Fresh and cached data are compared for staleness detection +- L1 cache works normally (not affected by shadow mode) + +**Configuration**: Set `ShadowMode: true` on `EntityCacheConfiguration` or `RootFieldCacheConfiguration`. + +**Staleness results** are available in `CacheAnalyticsSnapshot.ShadowComparisons`: +```go +type ShadowComparisonEvent struct { + CacheKey string // Cache key for correlation + EntityType string // Entity type name + IsFresh bool // true if cached data matches fresh data + CachedHash uint64 // xxhash of cached ProvidesData fields + FreshHash uint64 // xxhash of fresh ProvidesData fields + CachedBytes int // Size of cached ProvidesData + FreshBytes int // Size of fresh ProvidesData + DataSource string // Subgraph name + CacheAgeMs int64 // Age of cached entry (ms, 0 = unknown) + ConfiguredTTL time.Duration // TTL configured for this entity +} +``` + +## 10. Cache Analytics + +Enable via `EnableCacheAnalytics: true` in `CachingOptions`. After execution, collect stats: + +```go +snapshot := ctx.GetCacheStats() +``` + +### CacheAnalyticsSnapshot + +```go +type CacheAnalyticsSnapshot struct { + L1Reads []CacheKeyEvent // L1 read events (hit/miss) + L2Reads []CacheKeyEvent // L2 read events (hit/miss/partial-hit) + L1Writes []CacheWriteEvent // L1 write events + L2Writes []CacheWriteEvent // L2 write events + FetchTimings []FetchTimingEvent // Per-fetch timing with HTTP status + ErrorEvents []SubgraphErrorEvent // Subgraph errors + FieldHashes []EntityFieldHash // Field value hashes for staleness + EntityTypes []EntityTypeInfo // Entity counts by type + ShadowComparisons []ShadowComparisonEvent // Shadow mode results + MutationEvents []MutationEvent // Mutation impact on cache +} +``` + +### Convenience Methods + +```go +snapshot.L1HitRate() // float64 [0, 1] +snapshot.L2HitRate() // float64 [0, 1] +snapshot.CachedBytesServed() // int64 +snapshot.EventsByEntityType() // map[string]EntityTypeCacheStats +``` + +### Key Event Types + +**CacheKeyEvent** — per-key cache lookup: +```go +type CacheKeyEvent struct { + CacheKey string // Cache key + EntityType string // Entity type name + Kind CacheKeyEventKind // CacheKeyHit, CacheKeyMiss, CacheKeyPartialHit + DataSource string // Subgraph name + ByteSize int // Cached entry size + CacheAgeMs int64 // Age in ms (L2 only, 0 = unknown) + Shadow bool // Shadow mode event +} +``` + +**FetchTimingEvent** — per-fetch timing: +```go +type FetchTimingEvent struct { + DataSource string // Subgraph name + EntityType string // Entity type (empty for root fields) + DurationMs int64 // Fetch/lookup duration + Source FieldSource // FieldSourceSubgraph, FieldSourceL1, FieldSourceL2 + ItemCount int // Number of entities + IsEntityFetch bool // true for _entities queries + HTTPStatusCode int // HTTP status (0 for cache hits) + ResponseBytes int // Response body size (0 for cache hits) + TTFBMs int64 // Time to first byte +} +``` + +**MutationEvent** — mutation impact on cached entities: +```go +type MutationEvent struct { + MutationRootField string // e.g., "updateUser" + EntityType string // e.g., "User" + EntityCacheKey string // Display key JSON + HadCachedValue bool // true if L2 had an entry + IsStale bool // true if cached differs from mutation response + CachedHash uint64 // Hash of cached ProvidesData + FreshHash uint64 // Hash of mutation response ProvidesData + CachedBytes int // 0 when HadCachedValue=false + FreshBytes int +} +``` + +### Integration Pattern + +```go +// After each request: +snapshot := ctx.GetCacheStats() + +// Export to observability +metrics.RecordL1HitRate(snapshot.L1HitRate()) +metrics.RecordL2HitRate(snapshot.L2HitRate()) +metrics.RecordCachedBytesServed(snapshot.CachedBytesServed()) + +for _, timing := range snapshot.FetchTimings { + metrics.RecordFetchDuration(timing.DataSource, timing.DurationMs, timing.Source) +} + +for _, shadow := range snapshot.ShadowComparisons { + if !shadow.IsFresh { + log.Warn("stale cache entry", "entity", shadow.EntityType, "key", shadow.CacheKey, "age_ms", shadow.CacheAgeMs) + } +} + +for _, mutation := range snapshot.MutationEvents { + if mutation.IsStale { + log.Info("mutation updated stale cache", "field", mutation.MutationRootField, "entity", mutation.EntityType) + } +} +``` + +## 11. Complete Integration Example + +```go +package main + +import ( + "context" + "time" + + "github.com/wundergraph/graphql-go-tools/execution/engine" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/plan" + "github.com/wundergraph/graphql-go-tools/v2/pkg/engine/resolve" +) + +func setupCaching() { + // 1. Define subgraph caching configurations + cachingConfigs := engine.SubgraphCachingConfigs{ + { + SubgraphName: "accounts", + EntityCaching: plan.EntityCacheConfigurations{ + { + TypeName: "User", + CacheName: "default", + TTL: 5 * time.Minute, + IncludeSubgraphHeaderPrefix: true, + }, + }, + RootFieldCaching: plan.RootFieldCacheConfigurations{ + { + TypeName: "Query", + FieldName: "me", + CacheName: "default", + TTL: 1 * time.Minute, + IncludeSubgraphHeaderPrefix: true, + }, + }, + MutationFieldCaching: plan.MutationFieldCacheConfigurations{ + { + FieldName: "updateUser", + EnableEntityL2CachePopulation: true, + }, + }, + MutationCacheInvalidation: plan.MutationCacheInvalidationConfigurations{ + { + FieldName: "deleteUser", + EntityTypeName: "User", + }, + }, + }, + { + SubgraphName: "products", + EntityCaching: plan.EntityCacheConfigurations{ + { + TypeName: "Product", + CacheName: "default", + TTL: 10 * time.Minute, + }, + }, + RootFieldCaching: plan.RootFieldCacheConfigurations{ + { + TypeName: "Query", + FieldName: "topProducts", + CacheName: "default", + TTL: 30 * time.Second, + }, + }, + SubscriptionEntityPopulation: plan.SubscriptionEntityPopulationConfigurations{ + { + TypeName: "Product", + CacheName: "default", + TTL: 10 * time.Minute, + EnableInvalidationOnKeyOnly: true, + }, + }, + }, + } + + // 2. Create engine configuration + factory := engine.NewFederationEngineConfigFactory( + context.Background(), + subgraphConfigs, // []engine.SubgraphConfiguration + engine.WithSubgraphEntityCachingConfigs(cachingConfigs), + ) + config, _ := factory.BuildEngineConfiguration() + + // 3. Create resolver with cache instances + resolver := resolve.New(context.Background(), resolve.ResolverOptions{ + MaxConcurrency: 64, + Caches: map[string]resolve.LoaderCache{ + "default": NewRedisCache("redis://localhost:6379"), + }, + EntityCacheConfigs: map[string]map[string]*resolve.EntityCacheInvalidationConfig{ + "accounts": { + "User": {CacheName: "default", IncludeSubgraphHeaderPrefix: true}, + }, + "products": { + "Product": {CacheName: "default"}, + }, + }, + }) + + // 4. Per-request: enable caching + execCtx := resolve.NewContext(context.Background()) + execCtx.ExecutionOptions.Caching = resolve.CachingOptions{ + EnableL1Cache: true, + EnableL2Cache: true, + EnableCacheAnalytics: true, + L2CacheKeyInterceptor: func(ctx context.Context, key string, info resolve.L2CacheKeyInterceptorInfo) string { + // Optional: add tenant isolation + if tenantID, ok := ctx.Value("tenant-id").(string); ok { + return tenantID + ":" + key + } + return key + }, + } + + // 5. Resolve (uses config from step 2) + resolveInfo, _ := resolver.ResolveGraphQLResponse(execCtx, response, initialData, writer) + + // 6. Collect cache analytics + snapshot := execCtx.GetCacheStats() + _ = snapshot.L1HitRate() + _ = snapshot.L2HitRate() + _ = snapshot.CachedBytesServed() + _ = config + _ = resolveInfo +} +``` + +## 12. Configuration Reference Summary + +| Configuration | Package | Purpose | +|--------------|---------|---------| +| `SubgraphCachingConfig` | `execution/engine` | Top-level per-subgraph config container | +| `EntityCacheConfiguration` | `v2/pkg/engine/plan` | L2 entity caching (TypeName, TTL, etc.) | +| `RootFieldCacheConfiguration` | `v2/pkg/engine/plan` | L2 root field caching (FieldName, EntityKeyMappings) | +| `MutationFieldCacheConfiguration` | `v2/pkg/engine/plan` | Mutation L2 write control | +| `MutationCacheInvalidationConfiguration` | `v2/pkg/engine/plan` | Mutation-triggered L2 deletion | +| `SubscriptionEntityPopulationConfiguration` | `v2/pkg/engine/plan` | Subscription L2 populate/invalidate | +| `CachingOptions` | `v2/pkg/engine/resolve` | Per-request L1/L2/analytics enable | +| `L2CacheKeyInterceptor` | `v2/pkg/engine/resolve` | Custom key transform (tenant isolation) | +| `LoaderCache` | `v2/pkg/engine/resolve` | Cache backend interface | +| `EntityCacheInvalidationConfig` | `v2/pkg/engine/resolve` | Extension-based invalidation lookup | +| `ResolverOptions.Caches` | `v2/pkg/engine/resolve` | Named cache instance registry | diff --git a/execution/engine/CLAUDE.md b/execution/engine/CLAUDE.md deleted file mode 100644 index 2ea12f2432..0000000000 --- a/execution/engine/CLAUDE.md +++ /dev/null @@ -1,25 +0,0 @@ -# Caching Test Rules - -## Always check every cache log - -Every `defaultCache.ClearLog()` MUST be followed by `defaultCache.GetLog()` with full assertions BEFORE the next `ClearLog()` or end of test. Never clear a log without verifying its contents — skipped checks hide regressions. - -```go -// CORRECT: every ClearLog has a corresponding GetLog + assertion -defaultCache.ClearLog() -resp := gqlClient.Query(...) -assert.Equal(t, expectedResp, string(resp)) - -logAfterFirst := defaultCache.GetLog() -wantLog := []CacheLogEntry{ - {Operation: "get", Keys: []string{`...`}, Hits: []bool{false}}, - {Operation: "set", Keys: []string{`...`}}, -} -assert.Equal(t, sortCacheLogKeys(wantLog), sortCacheLogKeys(logAfterFirst), "descriptive message") - -// WRONG: ClearLog without checking — hides bugs -defaultCache.ClearLog() -resp := gqlClient.Query(...) -assert.Equal(t, expectedResp, string(resp)) -defaultCache.ClearLog() // previous log lost! -``` \ No newline at end of file diff --git a/v2/pkg/engine/resolve/CLAUDE.md b/v2/pkg/engine/resolve/CLAUDE.md new file mode 100644 index 0000000000..0f655b7058 --- /dev/null +++ b/v2/pkg/engine/resolve/CLAUDE.md @@ -0,0 +1,589 @@ +# Resolve Package Reference + +The `resolve` package is the execution core of the GraphQL engine. It takes a planned `GraphQLResponse` (response plan tree + fetch tree), executes subgraph fetches, and renders the final JSON response. Entity caching (L1/L2) is integrated directly into the fetch execution flow. + +## Architecture Overview + +Three components work together: + +| Component | File | Responsibility | +|-----------|------|---------------| +| **Resolver** | `resolve.go` | Orchestration, concurrency, arena pools, subscriptions | +| **Loader** | `loader.go` | Fetch execution, caching, result merging | +| **Resolvable** | `resolvable.go` | Response data, two-pass rendering, error handling | + +**End-to-end flow:** +``` +Resolver.ResolveGraphQLResponse(ctx, response, data, writer) + 1. Acquire concurrency semaphore + 2. Create Loader + Resolvable from arena pool + 3. Resolvable.Init(ctx, data, operationType) + 4. Loader.LoadGraphQLResponseData(ctx, response, resolvable) + └─ Walk fetch tree: sequence/parallel/single + └─ For each fetch: cache check → subgraph request → merge result + 5. Resolvable.Resolve(ctx, response.Data, response.Fetches, writer) + └─ Two-pass walk: validate+collect errors, then render JSON +``` + +## Resolver (resolve.go) + +Resolver is a single-threaded event loop for subscriptions and an orchestrator for query/mutation resolution. + +### Key Fields +```go +type Resolver struct { + ctx context.Context + options ResolverOptions + maxConcurrency chan struct{} // Semaphore (buffered channel, default 32) + resolveArenaPool *arena.Pool // Arena for Loader & Resolvable + responseBufferPool *arena.Pool // Arena for response buffering + subgraphRequestSingleFlight *SubgraphRequestSingleFlight + inboundRequestSingleFlight *InboundRequestSingleFlight + triggers map[uint64]*trigger // Subscription triggers + events chan subscriptionEvent // Subscription event loop +} +``` + +### Entry Points + +**ResolveGraphQLResponse** — standard resolution: +```go +func (r *Resolver) ResolveGraphQLResponse(ctx *Context, response *GraphQLResponse, data []byte, writer io.Writer) (*GraphQLResolveInfo, error) +``` + +**ArenaResolveGraphQLResponse** — optimized with inbound request deduplication: +```go +func (r *Resolver) ArenaResolveGraphQLResponse(ctx *Context, response *GraphQLResponse, writer io.Writer) (*GraphQLResolveInfo, error) +``` +Uses two separate arenas (resolve + response buffer). The resolve arena is freed early before I/O. Inbound deduplication: leader executes, followers wait and reuse buffered response. + +**ResolveGraphQLSubscription** — long-lived subscription: +```go +func (r *Resolver) ResolveGraphQLSubscription(ctx *Context, subscription *GraphQLSubscription, writer SubscriptionResponseWriter) error +``` + +### ResolverOptions + +Key fields on `ResolverOptions`: +- `MaxConcurrency` — semaphore size (default 32, ~50KB per concurrent resolve) +- `Caches map[string]LoaderCache` — named L2 cache instances +- `EntityCacheConfigs` — subgraph → entity type → invalidation config (for extension-based invalidation) +- `PropagateSubgraphErrors`, `SubgraphErrorPropagationMode` — error handling +- `ResolvableOptions` — Apollo compatibility flags +- `SubscriptionHeartbeatInterval` — heartbeat interval (default 5s) + +## Loader (loader.go) + +The Loader executes fetches and merges results into the Resolvable's data. Caching is embedded in the fetch execution flow. + +### Key Fields +```go +type Loader struct { + resolvable *Resolvable + ctx *Context + caches map[string]LoaderCache // Named L2 cache instances + l1Cache *sync.Map // Per-request entity cache (key→*astjson.Value) + jsonArena arena.Arena // NOT thread-safe, main thread only + singleFlight *SubgraphRequestSingleFlight + enableMutationL2CachePopulation bool // Set per-mutation, inherited by entity fetches + entityCacheConfigs map[string]map[string]*EntityCacheInvalidationConfig +} +``` + +### Fetch Tree Execution + +`LoadGraphQLResponseData` is the entry point. It dispatches on the fetch tree: + +```go +func (l *Loader) resolveFetchNode(node *FetchTreeNode) error { + switch node.Kind { + case FetchTreeNodeKindSingle: return l.resolveSingle(node.Item) + case FetchTreeNodeKindSequence: return l.resolveSerial(node.ChildNodes) + case FetchTreeNodeKindParallel: return l.resolveParallel(node.ChildNodes) + } +} +``` + +### Sequential Execution (resolveSerial) + +Each fetch waits for the previous one to complete: +```go +for i := range nodes { + err := l.resolveFetchNode(nodes[i]) +} +``` + +### Parallel Execution (resolveParallel) — 4-Phase Model + +The most sophisticated part. Handles L1/L2 cache with thread-safe analytics: + +**Phase 1: Prepare + L1 Check (Main Thread)** +- `prepareCacheKeys()` — generate L1 and L2 cache keys for each fetch +- `tryL1CacheLoad()` — check sync.Map for entity hits +- If L1 complete hit → set `cacheSkipFetch = true`, skip goroutine + +**Phase 2: L2 + Fetch (Goroutines via errgroup)** +- `loadFetchL2Only()` for fetches not cached in L1 +- Checks L2 cache (thread-safe), fetches from subgraph if needed +- Accumulates analytics in per-result slices (goroutine-safe) + +**Phase 3: Merge Analytics (Main Thread)** +- Merge L2 analytics events from per-result slices into collector +- Merge entity sources, fetch timings, error events + +**Phase 4: Merge Results (Main Thread)** +- `mergeResult()` — parse response JSON, merge into Resolvable data +- `callOnFinished()` — invoke LoaderHooks +- Populate L1 and L2 caches + +**Why this design?** L1 is cheap (in-memory sync.Map) — check on main thread to skip goroutine work early. L2/fetch are expensive — run in parallel goroutines. + +### Result Merging + +After a fetch completes, `mergeResult` does: +1. Check for errors in subgraph response +2. Handle auth/rate-limit rejections +3. Parse response JSON into arena-allocated values +4. Merge into items using `astjson.MergeValuesWithPath` +5. For batch entities: map response items back to original items via `batchStats` +6. Run cache invalidation (mutations, extensions) +7. Populate L1 and L2 caches + +### LoaderHooks + +```go +type LoaderHooks interface { + OnLoad(ctx context.Context, ds DataSourceInfo) context.Context + OnFinished(ctx context.Context, ds DataSourceInfo, info *ResponseInfo) +} +``` +Called before/after each fetch. `OnLoad` returns a context passed to `OnFinished`. Not called when fetch is skipped (null parent, auth rejection). + +### DataSource Interface + +```go +type DataSource interface { + Load(ctx context.Context, headers http.Header, input []byte) (data []byte, err error) + LoadWithFiles(ctx context.Context, headers http.Header, input []byte, files []*httpclient.FileUpload) (data []byte, err error) +} +``` + +## Resolvable (resolvable.go) + +Holds the response data and renders it to JSON using a two-pass tree walk. + +### Key Fields +```go +type Resolvable struct { + data *astjson.Value // Root response object (arena-allocated) + errors *astjson.Value // Errors array (lazily initialized) + astjsonArena arena.Arena // Shared with Loader, NOT thread-safe + print bool // false=pre-walk, true=print-walk + out io.Writer // Output for print pass + path []fastjsonext.PathElement // Current JSON path + depth int + operationType ast.OperationType + + // Entity cache analytics (set during print phase) + currentEntityAnalytics *ObjectCacheAnalytics + currentEntityTypeName string + currentEntitySource FieldSource +} +``` + +### Two-Pass Walk + +**Pass 1 (pre-walk)**: `print = false` +- Traverse response plan tree, validate types +- Check field authorization +- Collect errors (null bubbling for non-nullable fields) +- Do NOT write output + +**Pass 2 (print-walk)**: `print = true` +- Traverse again, write JSON to output +- Record entity cache analytics during rendering +- Hash field values for staleness detection + +### walkObject (core method) + +``` +1. Navigate to object in JSON: value = parent.Get(obj.Path...) +2. Null check: if nil and non-nullable → error with null bubbling +3. Type validation: check __typename against PossibleTypes +4. Entity analytics: extract key fields, record entity source (print phase only) +5. Walk all fields recursively: walkNode(field.Value, value) +6. Field authorization: skip unauthorized fields +``` + +### Error Handling Modes + +- **ErrorBehaviorPropagate** (default): null bubbles up to nearest nullable parent +- **ErrorBehaviorNull**: field becomes null even if non-nullable +- **ErrorBehaviorHalt**: stop all execution on first error + +## Response Plan Tree (Node Types) + +The planner produces a tree of Node types describing the expected response shape. + +### GraphQLResponse + +```go +type GraphQLResponse struct { + Data *Object // Response plan tree root + Fetches *FetchTreeNode // Fetch execution tree + Info *GraphQLResponseInfo + DataSources []DataSourceInfo +} +``` + +### Node Types + +| Type | Fields | Purpose | +|------|--------|---------| +| `Object` | Path, Fields, Nullable, PossibleTypes, CacheAnalytics | Object with named fields | +| `Field` | Name, OriginalName, Value (Node), CacheArgs, OnTypeNames, Info | Named field in an object | +| `Array` | Path, Nullable, Item (Node), SkipItem | List of items | +| `String` | Path, Nullable, IsObjectID | String scalar | +| `Scalar` | Path, Nullable | Custom scalar (raw JSON) | +| `Boolean`, `Integer`, `Float`, `BigInt` | Path, Nullable | Typed scalars | +| `Enum` | Path, Nullable, TypeName, Values | Enumeration | +| `Null`, `EmptyObject`, `EmptyArray` | — | Constant nodes | +| `StaticString` | Path, Value | Constant string value | + +### Field +```go +type Field struct { + Name []byte // Output name (may be alias) + OriginalName []byte // Schema name (nil if Name IS original) + Value Node // Nested response node + CacheArgs []CacheFieldArg // Field arguments for cache key suffix (xxhash) + OnTypeNames [][]byte // Fragment type conditions + Info *FieldInfo // Metadata (type names, authorization, source tracking) +} +``` + +## Fetch Tree + +The planner produces a separate tree for fetch execution. + +### FetchTreeNode +```go +type FetchTreeNode struct { + Kind FetchTreeNodeKind // Single | Sequence | Parallel + Item *FetchItem // For Single nodes + ChildNodes []*FetchTreeNode // For Sequence/Parallel nodes + Trigger *FetchTreeNode // For subscription triggers +} +``` + +### Fetch Types + +| Type | Use Case | Key Fields | +|------|----------|------------| +| `SingleFetch` | Root fields, standalone queries | InputTemplate, DataSource, Caching | +| `EntityFetch` | Nested entity (single object) | EntityInput (Header, Item, Footer) | +| `BatchEntityFetch` | Nested entity (array) | BatchInput (Header, Items[], Separator, Footer) | + +All fetch types carry `FetchCacheConfiguration` and `FetchInfo` (data source name, provides data, root fields). + +### FetchCacheConfiguration +```go +type FetchCacheConfiguration struct { + Enabled bool // L2 enabled for this fetch + CacheName string // Cache instance name + TTL time.Duration // Cache entry lifetime + CacheKeyTemplate CacheKeyTemplate // Key generation template + IncludeSubgraphHeaderPrefix bool // Prefix with header hash + RootFieldL1EntityCacheKeyTemplates map[string]CacheKeyTemplate // Entity L1 keys for root fields + EnablePartialCacheLoad bool // Fetch only missing entities + UseL1Cache bool // L1 enabled (set by postprocessor) + ShadowMode bool // Never serve cached data + MutationEntityImpactConfig *MutationEntityImpactConfig + EnableMutationL2CachePopulation bool // Mutations populate L2 + HashAnalyticsKeys bool // Hash vs raw in analytics + KeyFields []KeyField // @key fields for analytics +} +``` + +## Entity Caching + +### Architecture + +| Cache | Storage | Scope | Key Fields | Thread Safety | +|-------|---------|-------|------------|---------------| +| **L1** | `sync.Map` in Loader | Single request | `@key` only | sync.Map | +| **L2** | External (`LoaderCache`) | Cross-request | `@key` only | Per-result accumulation | + +**Key principle**: Both L1 and L2 use only `@key` fields for stable entity identity. + +### LoaderCache Interface +```go +type LoaderCache interface { + Get(ctx context.Context, keys []string) ([]*CacheEntry, error) + Set(ctx context.Context, entries []*CacheEntry, ttl time.Duration) error + Delete(ctx context.Context, keys []string) error +} + +type CacheEntry struct { + Key string + Value []byte // JSON-encoded entity + RemainingTTL time.Duration // TTL from cache (0 = unknown) +} +``` + +### Cache Key Generation + +**Entity keys** (via `EntityQueryCacheKeyTemplate`): +```json +{"__typename":"User","key":{"id":"123"}} +``` + +**Root field keys** (via `RootQueryCacheKeyTemplate`): +```json +{"__typename":"Query","field":"topProducts","args":{"first":5}} +``` + +**Key transformations** (applied in order): +1. Subgraph header hash prefix: `{headerHash}:{key}` (when `IncludeSubgraphHeaderPrefix = true`) +2. `L2CacheKeyInterceptor`: custom transform (e.g., tenant isolation) + +**Entity field argument-aware keys**: Fields with arguments get xxhash suffix appended, so different argument values produce different cache entries. + +### Cache Flow (Integrated into Loader Phases) + +**Sequential (tryCacheLoad):** +``` +prepareCacheKeys() → tryL1CacheLoad() → tryL2CacheLoad() → fetch → populateL1Cache() + updateL2Cache() +``` + +**Parallel (resolveParallel):** +``` +Phase 1 (main): prepareCacheKeys + tryL1CacheLoad for all fetches +Phase 2 (goroutines): tryL2CacheLoad + fetch via loadFetchL2Only +Phase 3 (main): merge analytics from goroutines +Phase 4 (main): mergeResult + populateL1Cache + updateL2Cache +``` + +### Self-Referential Entity Fix + +**Problem**: When `User.friends` returns `User` entities, L1 cache returns pointers to the same object → aliasing on merge → stack overflow. + +**Solution**: `shallowCopyProvidedFields()` in `loader_json_copy.go` creates copies based on `ProvidesData` schema. Only fields required by the fetch are copied (shallow, not deep). + +### ProvidesData and Validation + +`FetchInfo.ProvidesData` describes what fields a fetch provides. Used by: +- `validateItemHasRequiredData()` — check if cached entity has all required fields +- `shallowCopyProvidedFields()` — copy only required fields for self-referential entities + +**Critical**: For nested entity fetches, `ProvidesData` must contain entity fields (`id`, `username`), NOT the parent field (`author`). + +### Cache Invalidation + +**Extension-based** (`processExtensionsCacheInvalidation`): +Subgraphs return invalidation keys in response extensions: +```json +{"extensions":{"cacheInvalidation":{"keys":[{"typename":"User","key":{"id":"1"}}]}}} +``` +Optimization: skips delete if the same key is being written by `updateL2Cache`. + +**Mutation-based** (`MutationCacheInvalidationConfiguration`): +After mutation completes, delete L2 entry for the returned entity. + +**Subscription-based** (`SubscriptionEntityPopulationConfiguration`): +- Populate mode: write entity data to L2 on each subscription event +- Invalidate mode (`EnableInvalidationOnKeyOnly`): delete L2 entry when subscription provides only @key fields + +### Partial Cache Loading + +- **Default** (`EnablePartialCacheLoad = false`): any cache miss → refetch ALL entities in batch +- **Enabled** (`EnablePartialCacheLoad = true`): only fetch missing entities, serve cached ones directly + +### Shadow Mode + +L2 reads and writes happen normally, but cached data is **never served**. Fresh data is always fetched from the subgraph and compared against the cached value. Used for staleness detection via `ShadowComparisonEvent`. L1 cache works normally (not affected by shadow mode). + +### Cache Analytics + +Enable via `ctx.ExecutionOptions.Caching.EnableCacheAnalytics = true`. After execution, call `ctx.GetCacheStats()` to get `CacheAnalyticsSnapshot`. + +**CacheAnalyticsSnapshot** contains: +- `L1Reads`, `L2Reads` — `[]CacheKeyEvent` (hit/miss/partial-hit per key) +- `L1Writes`, `L2Writes` — `[]CacheWriteEvent` (key, size, TTL) +- `FetchTimings` — `[]FetchTimingEvent` (duration, HTTP status, response size, TTFB) +- `ErrorEvents` — `[]SubgraphErrorEvent` +- `FieldHashes` — `[]EntityFieldHash` (xxhash of field values for staleness) +- `EntityTypes` — `[]EntityTypeInfo` (count and unique keys per type) +- `ShadowComparisons` — `[]ShadowComparisonEvent` (cached vs fresh comparison) +- `MutationEvents` — `[]MutationEvent` (mutation impact on cached entities) + +**Convenience methods**: `L1HitRate()`, `L2HitRate()`, `CachedBytesServed()`, `EventsByEntityType()`. + +**Thread safety**: Analytics are accumulated per-result in goroutines (`l2AnalyticsEvents`, `l2FetchTimings`, `l2ErrorEvents`), then merged on the main thread via `MergeL2Events()`, `MergeL2FetchTimings()`, `MergeL2Errors()`. + +## Configuration Types + +### Runtime Options (set per-request on Context) +```go +type CachingOptions struct { + EnableL1Cache bool // Per-request entity cache + EnableL2Cache bool // External cross-request cache + EnableCacheAnalytics bool // Detailed event tracking + L2CacheKeyInterceptor L2CacheKeyInterceptor // Custom key transform +} + +type L2CacheKeyInterceptor func(ctx context.Context, key string, info L2CacheKeyInterceptorInfo) string +type L2CacheKeyInterceptorInfo struct { + SubgraphName string + CacheName string +} +``` + +### Plan-Time Configuration (in `plan/federation_metadata.go`) + +Set per-subgraph via `SubgraphCachingConfig`: + +| Type | Controls | +|------|----------| +| `EntityCacheConfiguration` | L2 caching for entity types (TypeName, CacheName, TTL, etc.) | +| `RootFieldCacheConfiguration` | L2 caching for root fields (TypeName, FieldName, EntityKeyMappings) | +| `MutationFieldCacheConfiguration` | Whether mutations populate L2 | +| `MutationCacheInvalidationConfiguration` | Which mutations delete L2 entries | +| `SubscriptionEntityPopulationConfiguration` | How subscriptions populate/invalidate L2 | + +## Thread Safety Model + +| Context | Operations | Safety Mechanism | +|---------|-----------|-----------------| +| Main thread | Arena allocation, L1 cache ops, result merging, two-pass rendering | Single-threaded | +| Goroutines (Phase 2) | L2 cache Get/Set/Delete, subgraph HTTP calls | Per-result accumulation slices | +| Analytics merge | Goroutine events → collector | Main thread merge after g.Wait() | +| L1 cache | Read/write entity values | sync.Map | + +**Rule**: Never allocate on `jsonArena` from a goroutine. All arena-allocated JSON is created on the main thread. + +## Arena Allocation + +- Resolver owns `resolveArenaPool` and `responseBufferPool` +- All `*astjson.Value` nodes live on the shared arena (no GC pressure) +- Arena is NOT thread-safe → only main thread allocates +- **Early release pattern** (ArenaResolveGraphQLResponse): resolve arena freed before I/O, response arena freed after write +- Never store heap-allocated `*Value` in arena-owned containers (GC can't trace into arena noscan memory) + +## Key Files + +| File | Purpose | +|------|---------| +| `resolve.go` | Resolver: orchestration, concurrency, subscriptions | +| `loader.go` | Loader: fetch execution, parallel phases, result merging | +| `resolvable.go` | Resolvable: two-pass walk, JSON rendering | +| `loader_cache.go` | L1/L2 cache operations, LoaderCache interface, prepareCacheKeys, tryL1/L2CacheLoad, populateL1Cache, updateL2Cache | +| `loader_json_copy.go` | shallowCopyProvidedFields for self-referential entities | +| `caching.go` | CacheKeyTemplate, EntityQueryCacheKeyTemplate, RootQueryCacheKeyTemplate | +| `cache_analytics.go` | CacheAnalyticsCollector, CacheAnalyticsSnapshot, all event types | +| `extensions_cache_invalidation.go` | processExtensionsCacheInvalidation | +| `fetch.go` | Fetch types (SingleFetch, EntityFetch, BatchEntityFetch), FetchCacheConfiguration | +| `fetchtree.go` | FetchTreeNode tree structure | +| `node_object.go` | Object, Field node types | +| `node_array.go` | Array node type | +| `node.go` | Node interface, NodeKind constants | +| `context.go` | Context, CachingOptions, ExecutionOptions | +| `datasource.go` | DataSource, SubscriptionDataSource interfaces | +| `response.go` | GraphQLResponse, GraphQLResponseInfo | + +## Testing Patterns + +### Unit Test Setup +```go +ctrl := gomock.NewController(t) +defer ctrl.Finish() + +ds := NewMockDataSource(ctrl) +ds.EXPECT().Load(gomock.Any(), gomock.Any(), gomock.Any()). + DoAndReturn(func(ctx context.Context, headers http.Header, input []byte) ([]byte, error) { + return []byte(`{"data":{...}}`), nil + }).Times(1) + +loader := &Loader{caches: map[string]LoaderCache{"default": cache}} + +// REQUIRED: Disable singleFlight for unit tests +ctx := NewContext(context.Background()) +ctx.ExecutionOptions.DisableSubgraphRequestDeduplication = true +ctx.ExecutionOptions.Caching = CachingOptions{EnableL1Cache: true, EnableL2Cache: true} + +// REQUIRED: Always use arena +ar := arena.NewMonotonicArena(arena.WithMinBufferSize(1024)) +resolvable := NewResolvable(ar, ResolvableOptions{}) +resolvable.Init(ctx, nil, ast.OperationTypeQuery) + +err := loader.LoadGraphQLResponseData(ctx, response, resolvable) +out := fastjsonext.PrintGraphQLResponse(resolvable.data, resolvable.errors) +``` + +### Exact Assertions + +**IMPORTANT**: Always use exact assertions. Never use vague comparisons. + +```go +// GOOD: exact values +assert.Equal(t, 3, hitCount, "should have exactly 3 L1 hits") +assert.Equal(t, int64(12), stats.L1Hits, "should have exactly 12 L1 hits") +assert.Equal(t, 2, accountsCalls, "should call accounts subgraph exactly twice") + +// BAD: hides regressions +assert.GreaterOrEqual(t, hitCount, 1) // DON'T DO THIS +assert.Greater(t, stats.L1Hits, int64(0)) // DON'T DO THIS +``` + +### Snapshot Comments + +Every event line in a `CacheAnalyticsSnapshot` assertion MUST have a brief comment explaining **why** that event occurred: + +```go +// GOOD: explains the "why" +L2Reads: []resolve.CacheKeyEvent{ + {CacheKey: keyUser, Kind: resolve.CacheKeyMiss, ...}, // First request, L2 empty + {CacheKey: keyUser, Kind: resolve.CacheKeyHit, ...}, // Populated by Request 1 +}, + +// BAD: restates the field value +{CacheKey: keyUser, Kind: resolve.CacheKeyMiss, ...}, // this is a miss +``` + +### Cache Log Rule + +Every `defaultCache.ClearLog()` MUST be followed by `defaultCache.GetLog()` with full assertions BEFORE the next `ClearLog()` or end of test. Never clear a log without verifying its contents. + +### Run Tests +```bash +go test -run "TestL1Cache" ./v2/pkg/engine/resolve/... -v +go test -run "TestFederationCaching" ./execution/engine/... -v +go test -race ./v2/pkg/engine/resolve/... -v +``` + +## astjson Quick Reference + +```go +// Create values on arena +astjson.ObjectValue(arena) +astjson.ArrayValue(arena) +astjson.StringValue(arena, string) +astjson.StringValueBytes(arena, []byte) +astjson.NumberValue(arena, string) +astjson.TrueValue(arena) +astjson.FalseValue(arena) +astjson.NullValue // Global constant (not a function) + +// Navigate +value.Get(keys...) // Navigate nested path +value.GetArray() // Get array items +value.GetStringBytes() // Get string as []byte +value.Type() // TypeNull, TypeTrue, TypeObject, etc. + +// Mutate +value.Set(arena, key, val) // Set object field +value.SetArrayItem(arena, idx, val) // Set array item + +// Serialize +value.MarshalTo([]byte) // Append JSON to buffer +``` From 70ed7091ddac6560ce7908f2b9d937ed00a707b5 Mon Sep 17 00:00:00 2001 From: Jens Neuse Date: Fri, 6 Mar 2026 09:43:38 +0100 Subject: [PATCH 2/2] =?UTF-8?q?docs:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20add=20fence=20language=20tags,=20fix=20type=20asser?= =?UTF-8?q?tion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `text` language tag to all unlabeled fenced code blocks (MD040) - Fix panic-prone ctx.Value type assertion in interceptor example Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 2 +- ENTITY_CACHING_INTEGRATION.md | 14 ++++++++------ v2/pkg/engine/resolve/CLAUDE.md | 8 ++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f032cd8e10..d40d2f28af 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ Module: `github.com/wundergraph/graphql-go-tools` (Go 1.25, go.work workspace) ## Data Flow -``` +```text parse → normalize → validate → plan → resolve → response ``` diff --git a/ENTITY_CACHING_INTEGRATION.md b/ENTITY_CACHING_INTEGRATION.md index 94f20e4ed3..6562126ade 100644 --- a/ENTITY_CACHING_INTEGRATION.md +++ b/ENTITY_CACHING_INTEGRATION.md @@ -252,8 +252,10 @@ ctx.ExecutionOptions.Caching = resolve.CachingOptions{ // Optional: transform L2 cache keys (e.g., for tenant isolation) L2CacheKeyInterceptor: func(ctx context.Context, key string, info resolve.L2CacheKeyInterceptorInfo) string { - tenantID := ctx.Value("tenant-id").(string) - return tenantID + ":" + key + if tenantID, ok := ctx.Value("tenant-id").(string); ok { + return tenantID + ":" + key + } + return key }, } ``` @@ -293,12 +295,12 @@ Arguments are sorted alphabetically for stable key generation. ### Key Transformations (applied in order) 1. **Subgraph header hash prefix** (when `IncludeSubgraphHeaderPrefix = true`): - ``` + ```text {headerHash}:{"__typename":"User","key":{"id":"123"}} ``` 2. **L2CacheKeyInterceptor** (when set): - ``` + ```text tenant-X:{headerHash}:{"__typename":"User","key":{"id":"123"}} ``` @@ -316,7 +318,7 @@ When `EntityKeyMappings` is configured on a root field, the L2 cache key uses en ### Queries -``` +```text L1 check (main thread, entity fetches only) ↓ miss L2 check (goroutine, entity + root fetches) @@ -383,7 +385,7 @@ With `EnableInvalidationOnKeyOnly: true`, subscription events that only contain ### Manual Invalidation Call `LoaderCache.Delete()` directly with cache keys. The key format is: -``` +```text [optional-interceptor-prefix:][optional-header-hash:]{"__typename":"TypeName","key":{...}} ``` diff --git a/v2/pkg/engine/resolve/CLAUDE.md b/v2/pkg/engine/resolve/CLAUDE.md index 0f655b7058..67e4b156f8 100644 --- a/v2/pkg/engine/resolve/CLAUDE.md +++ b/v2/pkg/engine/resolve/CLAUDE.md @@ -13,7 +13,7 @@ Three components work together: | **Resolvable** | `resolvable.go` | Response data, two-pass rendering, error handling | **End-to-end flow:** -``` +```text Resolver.ResolveGraphQLResponse(ctx, response, data, writer) 1. Acquire concurrency semaphore 2. Create Loader + Resolvable from arena pool @@ -206,7 +206,7 @@ type Resolvable struct { ### walkObject (core method) -``` +```text 1. Navigate to object in JSON: value = parent.Get(obj.Path...) 2. Null check: if nil and non-nullable → error with null bubbling 3. Type validation: check __typename against PossibleTypes @@ -352,12 +352,12 @@ type CacheEntry struct { ### Cache Flow (Integrated into Loader Phases) **Sequential (tryCacheLoad):** -``` +```text prepareCacheKeys() → tryL1CacheLoad() → tryL2CacheLoad() → fetch → populateL1Cache() + updateL2Cache() ``` **Parallel (resolveParallel):** -``` +```text Phase 1 (main): prepareCacheKeys + tryL1CacheLoad for all fetches Phase 2 (goroutines): tryL2CacheLoad + fetch via loadFetchL2Only Phase 3 (main): merge analytics from goroutines