From 38c93f76394ff0ebe35c50a8c73cc5f200eb4714 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 20:33:29 +0100 Subject: [PATCH 01/31] feat: add PQL manifest support for persisted operations --- cdn-server/cdn/src/index.ts | 63 ++- cdn-server/cdn/test/cdn.test.ts | 180 ++++++- cdn-server/src/s3.ts | 12 +- .../deletePersistedOperation.ts | 19 +- .../persisted-operation/generateManifest.ts | 61 +++ .../publishPersistedOperations.ts | 9 + .../bufservices/persisted-operation/utils.ts | 8 + .../core/repositories/OperationsRepository.ts | 30 ++ .../test/persisted-operations.test.ts | 202 +++++++- router-tests/pql_manifest_test.go | 384 ++++++++++++++ .../graph/operations/manifest.json | 10 + router/core/router.go | 39 +- router/internal/persistedoperation/client.go | 19 + .../persistedoperation/pqlmanifest/fetcher.go | 144 ++++++ .../persistedoperation/pqlmanifest/poller.go | 77 +++ .../persistedoperation/pqlmanifest/store.go | 81 +++ router/pkg/config/config.go | 7 + router/pkg/config/config.schema.json | 487 +++++------------- router/pkg/config/config_test.go | 109 ++++ router/pkg/config/fixtures/full.yaml | 8 +- .../pkg/config/testdata/config_defaults.json | 5 + router/pkg/config/testdata/config_full.json | 5 + 22 files changed, 1570 insertions(+), 389 deletions(-) create mode 100644 controlplane/src/core/bufservices/persisted-operation/generateManifest.ts create mode 100644 router-tests/pql_manifest_test.go create mode 100644 router-tests/testenv/testdata/cdn/organization/graph/operations/manifest.json create mode 100644 router/internal/persistedoperation/pqlmanifest/fetcher.go create mode 100644 router/internal/persistedoperation/pqlmanifest/poller.go create mode 100644 router/internal/persistedoperation/pqlmanifest/store.go diff --git a/cdn-server/cdn/src/index.ts b/cdn-server/cdn/src/index.ts index 02c4a4bc8f..828d7cdca0 100644 --- a/cdn-server/cdn/src/index.ts +++ b/cdn-server/cdn/src/index.ts @@ -26,12 +26,12 @@ export interface BlobStorage { headObject({ context, key, - schemaVersionId, + version, }: { context: Context; abortSignal?: AbortSignal; key: string; - schemaVersionId: string; + version: string; }): Promise; } @@ -165,7 +165,7 @@ const latestValidRouterConfig = (storage: BlobStorage) => { // starts for the first time, and we need to return a config anyway. if (body?.version) { try { - isModified = await storage.headObject({ context: c, key, schemaVersionId: body.version }); + isModified = await storage.headObject({ context: c, key, version: body.version }); } catch (e: any) { if (e instanceof BlobNotFoundError) { return c.notFound(); @@ -262,6 +262,58 @@ const cacheOperations = (storage: BlobStorage) => { }; }; +const persistedOperationsManifest = (storage: BlobStorage) => { + return async (c: Context) => { + const organizationId = c.get('authenticatedOrganizationId'); + const federatedGraphId = c.get('authenticatedFederatedGraphId'); + + if (organizationId !== c.req.param('organization_id') || federatedGraphId !== c.req.param('federated_graph_id')) { + return c.text('Bad Request', 400); + } + + const key = `${organizationId}/${federatedGraphId}/operations/manifest.json`; + + const body = await c.req.json(); + + let isModified = true; + + // Only check if revision is specified otherwise we assume the router + // starts for the first time, and we need to return the manifest anyway. + if (body?.revision) { + try { + isModified = await storage.headObject({ context: c, key, version: body.revision }); + } catch (e: any) { + if (e instanceof BlobNotFoundError) { + return c.notFound(); + } + throw e; + } + } + + if (!isModified) { + return c.body(null, 304); + } + + let blobObject: BlobObject; + + try { + blobObject = await storage.getObject({ context: c, key, cacheControl: 'no-cache' }); + } catch (e: any) { + if (e instanceof BlobNotFoundError) { + return c.notFound(); + } + throw e; + } + + c.header('Content-Type', 'application/json; charset=UTF-8'); + c.header('Cache-Control', 'no-cache, no-store, must-revalidate'); + + return stream(c, async (stream) => { + await stream.pipe(blobObject.stream); + }); + }; +}; + const subgraphChecks = (storage: BlobStorage) => { return async (c: Context) => { const organizationId = c.get('authenticatedOrganizationId'); @@ -301,6 +353,11 @@ export const cdn = , opts: CdnOptions, ) => { + const manifestPath = '/:organization_id/:federated_graph_id/operations/manifest.json'; + hono + .use(manifestPath, jwtMiddleware(opts.authJwtSecret)) + .post(manifestPath, persistedOperationsManifest(opts.blobStorage)); + const operations = '/:organization_id/:federated_graph_id/operations/:client_id/:operation{.+\\.json$}'; const latestValidRouterConfigs = '/:organization_id/:federated_graph_id/routerconfigs/latest.json'; hono.use(operations, jwtMiddleware(opts.authJwtSecret)).get(operations, persistedOperation(opts.blobStorage)); diff --git a/cdn-server/cdn/test/cdn.test.ts b/cdn-server/cdn/test/cdn.test.ts index 6a936930af..2e804061b8 100644 --- a/cdn-server/cdn/test/cdn.test.ts +++ b/cdn-server/cdn/test/cdn.test.ts @@ -30,12 +30,12 @@ class InMemoryBlobStorage implements BlobStorage { return Promise.resolve({ stream, metadata: obj.metadata }); } - headObject({ key, schemaVersionId }: { key: string; schemaVersionId: string }): Promise { + headObject({ key, version }: { key: string; version: string }): Promise { const obj = this.objects.get(key); if (!obj) { return Promise.reject(new BlobNotFoundError(`Object with key ${key} not found`)); } - if (obj.metadata?.version === schemaVersionId) { + if (obj.metadata?.version === version) { return Promise.resolve(false); } return Promise.resolve(true); @@ -554,6 +554,182 @@ describe('CDN handlers', () => { }); }); + describe('Test persisted operations manifest handler', async () => { + const federatedGraphId = 'federatedGraphId'; + const organizationId = 'organizationId'; + const token = await generateToken(organizationId, federatedGraphId, secretKey); + const blobStorage = new InMemoryBlobStorage(); + const requestPath = `/${organizationId}/${federatedGraphId}/operations/manifest.json`; + + const app = new Hono(); + + cdn(app, { + authJwtSecret: secretKey, + authAdmissionJwtSecret: secretAdmissionKey, + blobStorage, + }); + + test('it returns a 401 if no Authorization header is provided', async () => { + const res = await app.request(requestPath, { + method: 'POST', + body: JSON.stringify({}), + }); + expect(res.status).toBe(401); + }); + + test('it returns a 401 if an invalid Authorization header is provided', async () => { + const res = await app.request(requestPath, { + method: 'POST', + headers: { + Authorization: `Bearer ${token.slice(0, -1)}}`, + }, + body: JSON.stringify({}), + }); + expect(res.status).toBe(401); + }); + + test('it returns a 400 if the graph or organization ids does not match with the JWT payload', async () => { + const res = await app.request(`/foo/bar/operations/manifest.json`, { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({}), + }); + expect(res.status).toBe(400); + }); + + test('it returns a 401 if the token has expired', async () => { + const token = await new SignJWT({ + organization_id: organizationId, + federated_graph_id: federatedGraphId, + exp: Math.floor(Date.now() / 1000) - 60, + }) + .setProtectedHeader({ alg: 'HS256' }) + .sign(new TextEncoder().encode(secretKey)); + const res = await app.request(requestPath, { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({}), + }); + expect(res.status).toBe(401); + }); + + test('it returns the manifest on first request without revision', async () => { + const manifestContents = JSON.stringify({ + version: 1, + revision: 'abc123', + generatedAt: '2025-01-01T00:00:00.000Z', + operations: { + sha256hash1: 'query { hello }', + }, + }); + + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { + buffer: Buffer.from(manifestContents), + metadata: { version: 'abc123' }, + }); + + const res = await app.request(requestPath, { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({}), + }); + expect(res.status).toBe(200); + expect(res.headers.get('Content-Type')).toBe('application/json; charset=UTF-8'); + expect(res.headers.get('Cache-Control')).toBe('no-cache, no-store, must-revalidate'); + expect(await res.text()).toBe(manifestContents); + }); + + test('it returns 304 when revision matches the current revision', async () => { + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { + buffer: Buffer.from(JSON.stringify({ version: 1, revision: 'abc123', operations: {} })), + metadata: { version: 'abc123' }, + }); + + const res = await app.request(requestPath, { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ revision: 'abc123' }), + }); + expect(res.status).toBe(304); + }); + + test('it returns 200 when revision does not match the current revision', async () => { + const manifestContents = JSON.stringify({ + version: 1, + revision: 'def456', + generatedAt: '2025-01-01T00:00:00.000Z', + operations: { + sha256hash1: 'query { hello }', + }, + }); + + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { + buffer: Buffer.from(manifestContents), + metadata: { version: 'def456' }, + }); + + const res = await app.request(requestPath, { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ revision: 'old-revision' }), + }); + expect(res.status).toBe(200); + expect(await res.text()).toBe(manifestContents); + }); + + test('it returns a 404 if the manifest does not exist', async () => { + const otherBlobStorage = new InMemoryBlobStorage(); + const otherApp = new Hono(); + + cdn(otherApp, { + authJwtSecret: secretKey, + authAdmissionJwtSecret: secretAdmissionKey, + blobStorage: otherBlobStorage, + }); + + const res = await otherApp.request(requestPath, { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({}), + }); + expect(res.status).toBe(404); + }); + + test('it does not conflict with the individual persisted operations route', async () => { + const operationContents = JSON.stringify({ version: 1, body: 'query { hello }' }); + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/clientName/operation.json`, { + buffer: Buffer.from(operationContents), + }); + + const res = await app.request(`/${organizationId}/${federatedGraphId}/operations/clientName/operation.json`, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + }, + }); + expect(res.status).toBe(200); + expect(await res.text()).toBe(operationContents); + }); + }); + describe('schema check extensions handler', async () => { const organizationId = 'organizationId'; const checkId = randomUUID(); diff --git a/cdn-server/src/s3.ts b/cdn-server/src/s3.ts index 8c3db88334..81635e78d2 100644 --- a/cdn-server/src/s3.ts +++ b/cdn-server/src/s3.ts @@ -49,15 +49,7 @@ class S3BlobStorage implements BlobStorage { } } - async headObject({ - context, - key, - schemaVersionId, - }: { - context: Context; - key: string; - schemaVersionId: string; - }): Promise { + async headObject({ context, key, version }: { context: Context; key: string; version: string }): Promise { const command = new HeadObjectCommand({ Bucket: this.bucketName, Key: key, @@ -72,7 +64,7 @@ class S3BlobStorage implements BlobStorage { } else if (resp.$metadata.httpStatusCode !== 200) { throw new Error(`Failed to fetch the metadata of the object.`); } - if (resp.Metadata && resp.Metadata.version === schemaVersionId) { + if (resp.Metadata && resp.Metadata.version === version) { return false; } return true; diff --git a/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts b/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts index 4c74c270a5..446fffba01 100644 --- a/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts +++ b/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts @@ -5,13 +5,12 @@ import type { DeletePersistedOperationRequest, DeletePersistedOperationResponse, } from '@wundergraph/cosmo-connect/dist/platform/v1/platform_pb'; -import type { BlobStorage } from '../../blobstorage/index.js'; import { FederatedGraphRepository } from '../../repositories/FederatedGraphRepository.js'; import { UnauthorizedError } from '../../errors/errors.js'; import { OperationsRepository } from '../../repositories/OperationsRepository.js'; import type { RouterOptions } from '../../routes.js'; -import type { PersistedOperationWithClientDTO } from '../../../types/index.js'; import { enrichLogger, getLogger, handleError } from '../../util.js'; +import { generateAndUploadManifest } from './generateManifest.js'; import { createBlobStoragePath } from './utils.js'; export function deletePersistedOperation( @@ -76,9 +75,19 @@ export function deletePersistedOperation( }); try { - await opts.blobStorage.deleteObject({ - key: path, - }); + await Promise.all([ + opts.blobStorage.deleteObject({ + key: path, + }), + generateAndUploadManifest({ + db: opts.db, + federatedGraphId: federatedGraph.id, + organizationId: authContext.organizationId, + blobStorage: opts.blobStorage, + logger, + }), + ]); + return { response: { code: EnumStatusCode.OK, diff --git a/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts b/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts new file mode 100644 index 0000000000..b737e3bc16 --- /dev/null +++ b/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts @@ -0,0 +1,61 @@ +import crypto from 'node:crypto'; +import { PostgresJsDatabase } from 'drizzle-orm/postgres-js'; +import { FastifyBaseLogger } from 'fastify'; +import * as schema from '../../../db/schema.js'; +import type { BlobStorage } from '../../blobstorage/index.js'; +import { OperationsRepository } from '../../repositories/OperationsRepository.js'; +import { createManifestBlobStoragePath } from './utils.js'; + +export interface PQLManifest { + version: 1; + revision: string; + generatedAt: string; + operations: Record; // sha256 hash -> operation body +} + +export async function generateAndUploadManifest(params: { + db: PostgresJsDatabase; + federatedGraphId: string; + organizationId: string; + blobStorage: BlobStorage; + logger: FastifyBaseLogger; +}): Promise<{ revision: string; operationCount: number }> { + const { db, federatedGraphId, organizationId, blobStorage, logger } = params; + + const operationsRepo = new OperationsRepository(db, federatedGraphId); + const allOperations = await operationsRepo.getAllPersistedOperationsForGraph(); + + const operations: Record = {}; + for (const op of allOperations) { + operations[op.hash] = op.operationContent; + } + + // Compute revision as SHA256 of the deterministic JSON serialization (sorted keys) + const sortedKeys = Object.keys(operations).sort(); + const sortedOperations: Record = {}; + for (const key of sortedKeys) { + sortedOperations[key] = operations[key]; + } + const serialized = JSON.stringify(sortedOperations); + const revision = crypto.createHash('sha256').update(serialized).digest('hex'); + + const manifest: PQLManifest = { + version: 1, + revision, + generatedAt: new Date().toISOString(), + operations: sortedOperations, + }; + + const path = createManifestBlobStoragePath({ organizationId, fedGraphId: federatedGraphId }); + + await blobStorage.putObject({ + key: path, + body: Buffer.from(JSON.stringify(manifest), 'utf8'), + contentType: 'application/json; charset=utf-8', + metadata: { version: revision }, + }); + + logger.debug({ revision, operationCount: allOperations.length, path }, 'PQL manifest generated and uploaded'); + + return { revision, operationCount: allOperations.length }; +} diff --git a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts index 1b646ed281..acf33b383b 100644 --- a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts +++ b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts @@ -18,6 +18,7 @@ import { OperationsRepository } from '../../repositories/OperationsRepository.js import type { RouterOptions } from '../../routes.js'; import { enrichLogger, extractOperationNames, getLogger, handleError } from '../../util.js'; import { UnauthorizedError } from '../../errors/errors.js'; +import { generateAndUploadManifest } from './generateManifest.js'; import { createBlobStoragePath } from './utils.js'; const MAX_PERSISTED_OPERATIONS = 100; @@ -263,6 +264,14 @@ export function publishPersistedOperations( await operationsRepo.updatePersistedOperations(clientId, userId, updatedOperations); + await generateAndUploadManifest({ + db: opts.db, + federatedGraphId: federatedGraph.id, + organizationId, + blobStorage: opts.blobStorage, + logger, + }); + return { response: { code: EnumStatusCode.OK, diff --git a/controlplane/src/core/bufservices/persisted-operation/utils.ts b/controlplane/src/core/bufservices/persisted-operation/utils.ts index d58ce64da5..b814115eb3 100644 --- a/controlplane/src/core/bufservices/persisted-operation/utils.ts +++ b/controlplane/src/core/bufservices/persisted-operation/utils.ts @@ -9,3 +9,11 @@ export const createBlobStoragePath = ({ clientName: string; operationId: string; }): string => `${organizationId}/${fedGraphId}/operations/${clientName}/${operationId}.json`; + +export const createManifestBlobStoragePath = ({ + organizationId, + fedGraphId, +}: { + organizationId: string; + fedGraphId: string; +}): string => `${organizationId}/${fedGraphId}/operations/manifest.json`; diff --git a/controlplane/src/core/repositories/OperationsRepository.ts b/controlplane/src/core/repositories/OperationsRepository.ts index 9caf4db708..14c48f8d28 100644 --- a/controlplane/src/core/repositories/OperationsRepository.ts +++ b/controlplane/src/core/repositories/OperationsRepository.ts @@ -227,6 +227,36 @@ export class OperationsRepository { return result!.id; } + public async getAllPersistedOperationsForGraph(): Promise< + Array<{ + hash: string; + operationContent: string; + operationId: string; + operationNames: string[]; + clientName: string; + }> + > { + const results = await this.db + .select({ + hash: federatedGraphPersistedOperations.hash, + operationContent: federatedGraphPersistedOperations.operationContent, + operationId: federatedGraphPersistedOperations.operationId, + operationNames: federatedGraphPersistedOperations.operationNames, + clientName: federatedGraphClients.name, + }) + .from(federatedGraphPersistedOperations) + .innerJoin(federatedGraphClients, eq(federatedGraphClients.id, federatedGraphPersistedOperations.clientId)) + .where(eq(federatedGraphPersistedOperations.federatedGraphId, this.federatedGraphId)); + + return results.map((r) => ({ + hash: r.hash, + operationContent: r.operationContent ?? '', + operationId: r.operationId, + operationNames: r.operationNames ?? [], + clientName: r.clientName, + })); + } + public async getRegisteredClients(): Promise { const fedGraphClients = await this.db.query.federatedGraphClients.findMany({ where: eq(federatedGraphClients.federatedGraphId, this.federatedGraphId), diff --git a/controlplane/test/persisted-operations.test.ts b/controlplane/test/persisted-operations.test.ts index 653d4e18f0..acd3292b8c 100644 --- a/controlplane/test/persisted-operations.test.ts +++ b/controlplane/test/persisted-operations.test.ts @@ -241,14 +241,17 @@ describe('Persisted operations', (ctx) => { expect(publishOperationsResp.response?.code).toBe(EnumStatusCode.OK); const storageKeys = blobStorage.keys(); - expect(storageKeys.length).toBe(2); - const keyComponents = storageKeys[1].split('/'); + // 3 keys: routerconfig + operation + manifest + expect(storageKeys.length).toBe(3); + const operationKey = storageKeys.find((key) => key.includes(`/${id}.json`)); + expect(operationKey).toBeDefined(); + const keyComponents = operationKey!.split('/'); const keyFilename = keyComponents.at(-1)!; const keyBasename = keyFilename.split('.')[0]; expect(keyBasename).toBe(id); const blobObject = await blobStorage.getObject({ - key: storageKeys[1], + key: operationKey!, }); const text = await new Response(blobObject.stream).text(); expect(JSON.parse(text)).toEqual({ version: 1, body: query }); @@ -278,18 +281,20 @@ describe('Persisted operations', (ctx) => { expect(publishOperationsResp.response?.code).toBe(EnumStatusCode.OK); const storageKeys = blobStorage.keys(); - expect(storageKeys.length).toBe(2); + expect(storageKeys.length).toBe(3); // The client name should be escaped in the storage key - expect(storageKeys[1]).toContain(encodeURIComponent(clientName)); + const operationKey = storageKeys.find((key) => key.includes(`/${id}.json`)); + expect(operationKey).toBeDefined(); + expect(operationKey).toContain(encodeURIComponent(clientName)); - const keyComponents = storageKeys[1].split('/'); + const keyComponents = operationKey!.split('/'); const keyFilename = keyComponents.at(-1)!; const keyBasename = keyFilename.split('.')[0]; expect(keyBasename).toBe(id); const blobObject = await blobStorage.getObject({ - key: storageKeys[1], + key: operationKey!, }); const text = await new Response(blobObject.stream).text(); expect(JSON.parse(text)).toEqual({ version: 1, body: query }); @@ -325,7 +330,7 @@ describe('Persisted operations', (ctx) => { expect(publishOperationsResp.response?.code).toBe(EnumStatusCode.OK); - expect(blobStorage.keys().length).toBe(2); + expect(blobStorage.keys().length).toBe(3); const deleteFederatedGraphResp = await client.deleteFederatedGraph({ name: fedGraphName, @@ -540,6 +545,187 @@ describe('Persisted operations', (ctx) => { }); }); + describe('manifest generation', () => { + test('Should generate a PQL manifest after publishing persisted operations', async (testContext) => { + const { client, server, blobStorage } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const query = `query { hello }`; + + const publishOperationsResp = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [{ id: genID('hello'), contents: query }], + }); + + expect(publishOperationsResp.response?.code).toBe(EnumStatusCode.OK); + + const storageKeys = blobStorage.keys(); + const manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + expect(manifestKey).toBeDefined(); + + const blobObject = await blobStorage.getObject({ key: manifestKey! }); + const text = await new Response(blobObject.stream).text(); + const manifest = JSON.parse(text); + + expect(manifest.version).toBe(1); + expect(manifest.revision).toBeDefined(); + expect(manifest.generatedAt).toBeDefined(); + expect(Object.keys(manifest.operations).length).toBe(1); + + const entry = Object.values(manifest.operations)[0] as string; + expect(entry).toBe(query); + }); + + test('Should include operations from multiple clients in the manifest', async (testContext) => { + const { client, server, blobStorage } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const queryA = `query { hello }`; + const queryB = `query { world }`; + + const publishResp1 = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'client-a', + operations: [{ id: genID('op1'), contents: queryA }], + }); + expect(publishResp1.response?.code).toBe(EnumStatusCode.OK); + + const publishResp2 = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'client-b', + operations: [{ id: genID('op2'), contents: queryB }], + }); + expect(publishResp2.response?.code).toBe(EnumStatusCode.OK); + + const storageKeys = blobStorage.keys(); + const manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + expect(manifestKey).toBeDefined(); + + const blobObject = await blobStorage.getObject({ key: manifestKey! }); + const text = await new Response(blobObject.stream).text(); + const manifest = JSON.parse(text); + + expect(Object.keys(manifest.operations).length).toBe(2); + + const bodies = Object.values(manifest.operations) as string[]; + expect(bodies).toContain(queryA); + expect(bodies).toContain(queryB); + }); + + test('Should regenerate the manifest after deleting a persisted operation', async (testContext) => { + const { client, server, blobStorage } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const query1 = `query { hello }`; + const query2 = `query { world }`; + const op1Id = genID('op1'); + const op2Id = genID('op2'); + + const publishResp = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [ + { id: op1Id, contents: query1 }, + { id: op2Id, contents: query2 }, + ], + }); + expect(publishResp.response?.code).toBe(EnumStatusCode.OK); + + // Verify manifest has 2 operations + let storageKeys = blobStorage.keys(); + let manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + let blobObject = await blobStorage.getObject({ key: manifestKey! }); + let text = await new Response(blobObject.stream).text(); + let manifest = JSON.parse(text); + expect(Object.keys(manifest.operations).length).toBe(2); + const revisionBefore = manifest.revision; + + // Delete one operation + const deleteResp = await client.deletePersistedOperation({ + fedGraphName, + namespace: 'default', + operationId: publishResp.operations[0].id, + clientName: 'test-client', + }); + expect(deleteResp.response?.code).toBe(EnumStatusCode.OK); + + // Verify manifest now has 1 operation with a new revision + storageKeys = blobStorage.keys(); + manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + blobObject = await blobStorage.getObject({ key: manifestKey! }); + text = await new Response(blobObject.stream).text(); + manifest = JSON.parse(text); + expect(Object.keys(manifest.operations).length).toBe(1); + expect(manifest.revision).not.toBe(revisionBefore); + }); + + test('Should produce a deterministic revision for the same set of operations', async (testContext) => { + const { client, server, blobStorage } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const query = `query { hello }`; + + const publishResp = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [{ id: genID('hello'), contents: query }], + }); + expect(publishResp.response?.code).toBe(EnumStatusCode.OK); + + const storageKeys = blobStorage.keys(); + const manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + const blobObject1 = await blobStorage.getObject({ key: manifestKey! }); + const text1 = await new Response(blobObject1.stream).text(); + const manifest1 = JSON.parse(text1); + + // Publish the same operations again (will be UP_TO_DATE), which still triggers manifest regen + const publishResp2 = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [{ id: publishResp.operations[0].id, contents: query }], + }); + expect(publishResp2.response?.code).toBe(EnumStatusCode.OK); + + const blobObject2 = await blobStorage.getObject({ key: manifestKey! }); + const text2 = await new Response(blobObject2.stream).text(); + const manifest2 = JSON.parse(text2); + + // Same operations should produce the same revision + expect(manifest2.revision).toBe(manifest1.revision); + }); + }); + describe('check', () => { test('Should check the traffic of the operation', async (testContext) => { const { client, server } = await SetupTest({ diff --git a/router-tests/pql_manifest_test.go b/router-tests/pql_manifest_test.go new file mode 100644 index 0000000000..6d7d8d3f55 --- /dev/null +++ b/router-tests/pql_manifest_test.go @@ -0,0 +1,384 @@ +package integration + +import ( + "encoding/json" + "io" + "net/http" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/wundergraph/cosmo/router-tests/testenv" + "github.com/wundergraph/cosmo/router/core" + "github.com/wundergraph/cosmo/router/pkg/config" + "go.uber.org/zap/zapcore" +) + +func TestPQLManifest(t *testing.T) { + t.Parallel() + + expectedEmployeesBody := `{"data":{"employees":[{"id":1},{"id":2},{"id":3},{"id":4},{"id":5},{"id":7},{"id":8},{"id":10},{"id":11},{"id":12}]}}` + persistedNotFoundResp := `{"errors":[{"message":"PersistedQueryNotFound","extensions":{"code":"PERSISTED_QUERY_NOT_FOUND"}}]}` + + manifestConfig := config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + } + + t.Run("lookup succeeds for known operations", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfig), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.InfoLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // Verify startup log + logEntries := xEnv.Observer().FilterMessageSnippet("Loaded initial PQL manifest").All() + require.Len(t, logEntries, 1) + }) + }) + + t.Run("rejects unknown operation hash", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfig), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "0000000000000000000000000000000000000000000000000000000000000000"}}`), + }) + require.Equal(t, persistedNotFoundResp, res.Body) + }) + }) + + t.Run("no CDN requests for individual operations", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfig), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // Make multiple requests + for i := 0; i < 3; i++ { + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + } + + // Check CDN request log - only manifest request, no individual operation requests + resp, err := http.Get(xEnv.CDN.URL) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var cdnRequests []string + err = json.Unmarshal(body, &cdnRequests) + require.NoError(t, err) + + // Should have manifest request(s) but no individual operation requests + for _, req := range cdnRequests { + require.False(t, strings.Contains(req, "/operations/my-client/"), + "expected no individual operation CDN requests, but got: %s", req) + } + }) + }) + + t.Run("safelist with manifest allows known queries", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + Safelist: config.SafelistConfiguration{Enabled: true}, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: "query Employees {\n employees {\n id\n }\n}", + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + }) + }) + + t.Run("safelist with manifest rejects unknown queries", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + Safelist: config.SafelistConfiguration{Enabled: true}, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: "query Employees {\n\n\n employees {\n id\n }\n}", + }) + require.NoError(t, err) + require.Equal(t, persistedNotFoundResp, res.Body) + }) + }) + + t.Run("log_unknown with manifest logs and allows unknown queries", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + LogUnknown: true, + }), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.InfoLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + nonPersistedQuery := "query Employees {\n\n\n employees {\n id\n }\n}" + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: nonPersistedQuery, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() + require.Len(t, logEntries, 1) + requestContext := logEntries[0].ContextMap() + require.Equal(t, nonPersistedQuery, requestContext["query"]) + }) + }) + + t.Run("without manifest CDN is used for individual operations", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{}, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // Verify CDN was hit for the individual operation + resp, err := http.Get(xEnv.CDN.URL) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var cdnRequests []string + err = json.Unmarshal(body, &cdnRequests) + require.NoError(t, err) + + // CDN should have been called for the individual operation, not the manifest + hasOperationRequest := false + hasManifestRequest := false + for _, req := range cdnRequests { + if strings.Contains(req, "/operations/my-client/") { + hasOperationRequest = true + } + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + } + require.True(t, hasOperationRequest, "CDN should be called for individual operations when manifest is disabled") + require.False(t, hasManifestRequest, "CDN should not fetch manifest when manifest is disabled") + }) + }) + + t.Run("without manifest safelist still uses CDN", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Safelist: config.SafelistConfiguration{Enabled: true}, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // Known persisted query should succeed via CDN lookup + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: "query Employees {\n employees {\n id\n }\n}", + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // Unknown query should be rejected + res, err = xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: "query Employees {\n\n\n employees {\n id\n }\n}", + }) + require.NoError(t, err) + require.Equal(t, persistedNotFoundResp, res.Body) + + // Verify CDN was hit for individual operations, not manifest + resp, err := http.Get(xEnv.CDN.URL) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var cdnRequests []string + err = json.Unmarshal(body, &cdnRequests) + require.NoError(t, err) + + hasOperationRequest := false + hasManifestRequest := false + for _, req := range cdnRequests { + if strings.Contains(req, "/operations/my-client/") { + hasOperationRequest = true + } + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + } + require.True(t, hasOperationRequest, "CDN should be called for individual operations when manifest is disabled") + require.False(t, hasManifestRequest, "CDN should not fetch manifest when manifest is disabled") + }) + }) + + t.Run("without manifest log_unknown still uses CDN", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + LogUnknown: true, + }), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.InfoLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + nonPersistedQuery := "query Employees {\n\n\n employees {\n id\n }\n}" + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // Unknown query should be logged but allowed + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: nonPersistedQuery, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() + require.Len(t, logEntries, 1) + + // Verify CDN was used, not manifest + resp, err := http.Get(xEnv.CDN.URL) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var cdnRequests []string + err = json.Unmarshal(body, &cdnRequests) + require.NoError(t, err) + + hasManifestRequest := false + for _, req := range cdnRequests { + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + } + require.False(t, hasManifestRequest, "CDN should not fetch manifest when manifest is disabled") + }) + }) + + t.Run("log_unknown with safelist and manifest logs and rejects unknown queries", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + LogUnknown: true, + Safelist: config.SafelistConfiguration{Enabled: true}, + }), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.InfoLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + nonPersistedQuery := "query Employees {\n\n\n employees {\n id\n }\n}" + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: nonPersistedQuery, + }) + require.NoError(t, err) + require.Equal(t, persistedNotFoundResp, res.Body) + + logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() + require.Len(t, logEntries, 1) + requestContext := logEntries[0].ContextMap() + require.Equal(t, nonPersistedQuery, requestContext["query"]) + }) + }) +} diff --git a/router-tests/testenv/testdata/cdn/organization/graph/operations/manifest.json b/router-tests/testenv/testdata/cdn/organization/graph/operations/manifest.json new file mode 100644 index 0000000000..711eb32d4f --- /dev/null +++ b/router-tests/testenv/testdata/cdn/organization/graph/operations/manifest.json @@ -0,0 +1,10 @@ +{ + "version": 1, + "revision": "test-revision-001", + "generatedAt": "2024-01-01T00:00:00Z", + "operations": { + "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f": "query Employees {\n employees {\n id\n }\n}", + "33651da3d80e420709520fb900c7ab8ec4151555da56062feeee428cf7f3a5dd": "query Employees {\n employees {\n id\n }\n}", + "9015ddfadd802bb378a14e48cea51e9bf9a07c7f8a71d85c56d7b104fea84937": "query Employees {\n employees {\n id\n }\n}" + } +} diff --git a/router/core/router.go b/router/core/router.go index dd6d14e17d..f494ea63d2 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -37,6 +37,7 @@ import ( "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage/cdn" "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage/fs" "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage/s3" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" rd "github.com/wundergraph/cosmo/router/internal/rediscloser" "github.com/wundergraph/cosmo/router/internal/retrytransport" "github.com/wundergraph/cosmo/router/internal/stringsx" @@ -1092,7 +1093,7 @@ func (r *Router) bootstrap(ctx context.Context) error { r.staticExecutionConfig = executionConfig } - if err := r.buildClients(); err != nil { + if err := r.buildClients(ctx); err != nil { return err } @@ -1115,7 +1116,7 @@ func (r *Router) bootstrap(ctx context.Context) error { } // buildClients initializes the storage clients for persisted operations and router config. -func (r *Router) buildClients() error { +func (r *Router) buildClients(ctx context.Context) error { s3Providers := map[string]config.S3StorageProvider{} cdnProviders := map[string]config.CDNStorageProvider{} redisProviders := map[string]config.RedisStorageProvider{} @@ -1247,7 +1248,38 @@ func (r *Router) buildClients() error { } } - if pClient != nil || apqClient != nil { + var pqlStore *pqlmanifest.Store + + if r.persistedOperationsConfig.Manifest.Enabled { + if r.graphApiToken == "" { + return errors.New("graph token is required for PQL manifest") + } + + fetcher, err := pqlmanifest.NewFetcher(r.cdnConfig.URL, r.graphApiToken, r.logger) + if err != nil { + return fmt.Errorf("failed to create PQL manifest fetcher: %w", err) + } + + pqlStore = pqlmanifest.NewStore(r.logger) + + poller := pqlmanifest.NewPoller( + fetcher, pqlStore, + r.persistedOperationsConfig.Manifest.PollInterval, + r.persistedOperationsConfig.Manifest.PollJitter, + r.logger, + ) + + if err := poller.FetchInitial(ctx); err != nil { + r.logger.Warn("Failed to fetch initial PQL manifest, will retry on next poll", zap.Error(err)) + } + + go poller.Poll(ctx) + + // When manifest is enabled, do not use CDN fetches for individual operations + pClient = nil + } + + if pClient != nil || apqClient != nil || pqlStore != nil { // For backwards compatibility with cdn config field cacheSize := r.persistedOperationsConfig.Cache.Size.Uint64() if cacheSize <= 0 { @@ -1259,6 +1291,7 @@ func (r *Router) buildClients() error { Logger: r.logger, ProviderClient: pClient, ApqClient: apqClient, + PQLStore: pqlStore, }) if err != nil { return err diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index 2028f67941..568a8d2130 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -7,6 +7,7 @@ import ( "github.com/wundergraph/cosmo/router/internal/persistedoperation/apq" "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" "go.uber.org/zap" ) @@ -37,12 +38,14 @@ type Options struct { ProviderClient StorageClient ApqClient apq.Client + PQLStore *pqlmanifest.Store } type Client struct { cache *operationstorage.OperationsCache providerClient StorageClient apqClient apq.Client + pqlStore *pqlmanifest.Store } func NewClient(opts *Options) (*Client, error) { @@ -57,6 +60,7 @@ func NewClient(opts *Options) (*Client, error) { providerClient: opts.ProviderClient, cache: cache, apqClient: opts.ApqClient, + pqlStore: opts.PQLStore, }, nil } @@ -72,6 +76,21 @@ func (c *Client) PersistedOperation(ctx context.Context, clientName string, sha2 return data, false, nil } + // PQL manifest check (local, no network) + if c.pqlStore != nil && c.pqlStore.IsLoaded() { + if body, found := c.pqlStore.LookupByHash(sha256Hash); found { + c.cache.Set(clientName, sha256Hash, body, 0) + return body, false, nil + } + // Manifest is authoritative — operation not found + if c.apqClient != nil { + return nil, true, nil + } + return nil, false, &PersistentOperationNotFoundError{ + ClientName: clientName, Sha256Hash: sha256Hash, + } + } + if c.providerClient == nil { // This can happen if we are using APQ client, without any persisted operation client. Otherwise, we should have a provider client and shouldn't reach here. return nil, c.apqClient != nil, nil diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher.go b/router/internal/persistedoperation/pqlmanifest/fetcher.go new file mode 100644 index 0000000000..3d26cede48 --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/fetcher.go @@ -0,0 +1,144 @@ +package pqlmanifest + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + + "github.com/wundergraph/cosmo/router/internal/httpclient" + "github.com/wundergraph/cosmo/router/internal/jwt" + "go.uber.org/zap" +) + +type manifestRequestBody struct { + Revision string `json:"revision,omitempty"` +} + +type Fetcher struct { + cdnURL *url.URL + authenticationToken string + // federatedGraphID is the ID of the federated graph that was obtained + // from the token, already url-escaped + federatedGraphID string + // organizationID is the ID of the organization for this graph that was obtained + // from the token, already url-escaped + organizationID string + httpClient *http.Client + logger *zap.Logger +} + +// NewFetcher creates a new manifest fetcher. It reuses JWT extraction and HTTP client +// setup patterns from the CDN persisted operations client. +func NewFetcher(endpoint, token string, logger *zap.Logger) (*Fetcher, error) { + u, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("invalid CDN URL %q: %w", endpoint, err) + } + + claims, err := jwt.ExtractFederatedGraphTokenClaims(token) + if err != nil { + return nil, err + } + + if logger == nil { + logger = zap.NewNop() + } + + logger = logger.With( + zap.String("component", "pql_manifest_fetcher"), + zap.String("url", endpoint), + ) + + return &Fetcher{ + cdnURL: u, + authenticationToken: token, + federatedGraphID: url.PathEscape(claims.FederatedGraphID), + organizationID: url.PathEscape(claims.OrganizationID), + httpClient: httpclient.NewRetryableHTTPClient(logger), + logger: logger, + }, nil +} + +// Fetch downloads the manifest from the CDN. It POSTs to /{orgId}/{fedGraphId}/operations/manifest.json +// with Bearer auth, sending the current revision in the request body. The CDN returns 304 Not Modified +// when the revision matches, avoiding a full download. Returns (manifest, changed, err). +func (f *Fetcher) Fetch(ctx context.Context, currentRevision string) (*Manifest, bool, error) { + manifestPath := fmt.Sprintf("/%s/%s/operations/manifest.json", f.organizationID, f.federatedGraphID) + manifestURL := f.cdnURL.ResolveReference(&url.URL{Path: manifestPath}) + + reqBody, err := json.Marshal(manifestRequestBody{ + Revision: currentRevision, + }) + if err != nil { + return nil, false, fmt.Errorf("could not marshal request body: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, "POST", manifestURL.String(), bytes.NewReader(reqBody)) + if err != nil { + return nil, false, err + } + + req.Header.Set("Content-Type", "application/json; charset=UTF-8") + req.Header.Add("Authorization", "Bearer "+f.authenticationToken) + req.Header.Set("Accept-Encoding", "gzip") + + resp, err := f.httpClient.Do(req) + if err != nil { + return nil, false, err + } + defer func() { + _ = resp.Body.Close() + }() + + if resp.StatusCode == http.StatusNotModified { + return nil, false, nil + } + + if resp.StatusCode != http.StatusOK { + if resp.StatusCode == http.StatusNotFound { + return nil, false, errors.New("PQL manifest not found on CDN") + } + if resp.StatusCode == http.StatusUnauthorized { + return nil, false, errors.New("could not authenticate against CDN") + } + if resp.StatusCode == http.StatusBadRequest { + return nil, false, errors.New("bad request") + } + return nil, false, fmt.Errorf("unexpected status code when loading PQL manifest, statusCode: %d", resp.StatusCode) + } + + var reader io.Reader = resp.Body + + if resp.Header.Get("Content-Encoding") == "gzip" { + r, err := gzip.NewReader(resp.Body) + if err != nil { + return nil, false, fmt.Errorf("could not create gzip reader: %w", err) + } + defer func() { + _ = r.Close() + }() + reader = r + } + + body, err := io.ReadAll(reader) + if err != nil { + return nil, false, fmt.Errorf("could not read response body: %w", err) + } + + if len(body) == 0 { + return nil, false, errors.New("empty response body") + } + + var manifest Manifest + if err := json.Unmarshal(body, &manifest); err != nil { + return nil, false, fmt.Errorf("could not unmarshal PQL manifest: %w", err) + } + + return &manifest, true, nil +} diff --git a/router/internal/persistedoperation/pqlmanifest/poller.go b/router/internal/persistedoperation/pqlmanifest/poller.go new file mode 100644 index 0000000000..a35dd26bc3 --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/poller.go @@ -0,0 +1,77 @@ +package pqlmanifest + +import ( + "context" + "math/rand" + "time" + + "go.uber.org/zap" +) + +type Poller struct { + fetcher *Fetcher + store *Store + pollInterval time.Duration + pollJitter time.Duration + logger *zap.Logger +} + +func NewPoller(fetcher *Fetcher, store *Store, pollInterval, pollJitter time.Duration, logger *zap.Logger) *Poller { + return &Poller{ + fetcher: fetcher, + store: store, + pollInterval: pollInterval, + pollJitter: pollJitter, + logger: logger, + } +} + +// FetchInitial performs a blocking initial fetch, called at startup. +func (p *Poller) FetchInitial(ctx context.Context) error { + manifest, changed, err := p.fetcher.Fetch(ctx, "") + if err != nil { + return err + } + + if changed && manifest != nil { + p.store.Load(manifest) + p.logger.Info("Loaded initial PQL manifest", + zap.String("revision", manifest.Revision), + zap.Int("operation_count", len(manifest.Operations)), + ) + } + + return nil +} + +// Poll runs a background goroutine loop that periodically fetches the manifest. +// It sleeps for pollInterval + random jitter, fetches, and if changed calls store.Load(). +// It exits when ctx is cancelled. +func (p *Poller) Poll(ctx context.Context) { + for { + jitter := time.Duration(rand.Int63n(int64(p.pollJitter + 1))) + sleepDuration := p.pollInterval + jitter + + select { + case <-ctx.Done(): + return + case <-time.After(sleepDuration): + } + + currentRevision := p.store.Revision() + manifest, changed, err := p.fetcher.Fetch(ctx, currentRevision) + if err != nil { + p.logger.Warn("Failed to fetch PQL manifest", zap.Error(err)) + continue + } + + if changed && manifest != nil { + p.store.Load(manifest) + p.logger.Info("Updated PQL manifest", + zap.String("revision", manifest.Revision), + zap.String("previous_revision", currentRevision), + zap.Int("operation_count", len(manifest.Operations)), + ) + } + } +} diff --git a/router/internal/persistedoperation/pqlmanifest/store.go b/router/internal/persistedoperation/pqlmanifest/store.go new file mode 100644 index 0000000000..d330740f4a --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/store.go @@ -0,0 +1,81 @@ +package pqlmanifest + +import ( + "sync" + + "go.uber.org/zap" +) + +type Manifest struct { + Version int `json:"version"` + Revision string `json:"revision"` + GeneratedAt string `json:"generatedAt"` + Operations map[string]string `json:"operations"` // sha256 hash -> operation body +} + +type Store struct { + mu sync.RWMutex + manifest *Manifest + logger *zap.Logger +} + +func NewStore(logger *zap.Logger) *Store { + return &Store{ + logger: logger, + } +} + +// Load write-locks and swaps the manifest atomically. +func (s *Store) Load(manifest *Manifest) { + s.mu.Lock() + defer s.mu.Unlock() + s.manifest = manifest +} + +// LookupByHash read-locks and performs an O(1) map lookup by sha256 hash. +func (s *Store) LookupByHash(sha256Hash string) (body []byte, found bool) { + s.mu.RLock() + defer s.mu.RUnlock() + + if s.manifest == nil { + return nil, false + } + + op, ok := s.manifest.Operations[sha256Hash] + if !ok { + return nil, false + } + + return []byte(op), true +} + +// IsLoaded returns whether a manifest has been loaded. +func (s *Store) IsLoaded() bool { + s.mu.RLock() + defer s.mu.RUnlock() + return s.manifest != nil +} + +// Revision returns the current manifest revision for polling. +func (s *Store) Revision() string { + s.mu.RLock() + defer s.mu.RUnlock() + + if s.manifest == nil { + return "" + } + + return s.manifest.Revision +} + +// OperationCount returns the number of operations in the manifest. +func (s *Store) OperationCount() int { + s.mu.RLock() + defer s.mu.RUnlock() + + if s.manifest == nil { + return 0 + } + + return len(s.manifest.Operations) +} diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index e40812f34a..cda12ce30b 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -927,12 +927,19 @@ type AutomaticPersistedQueriesCacheConfig struct { TTL int `yaml:"ttl" env:"APQ_CACHE_TTL" envDefault:"-1"` } +type PQLManifestConfig struct { + Enabled bool `yaml:"enabled" envDefault:"false" env:"ENABLED"` + PollInterval time.Duration `yaml:"poll_interval" envDefault:"10s" env:"POLL_INTERVAL"` + PollJitter time.Duration `yaml:"poll_jitter" envDefault:"5s" env:"POLL_JITTER"` +} + type PersistedOperationsConfig struct { Disabled bool `yaml:"disabled" env:"DISABLED" envDefault:"false"` LogUnknown bool `yaml:"log_unknown" env:"LOG_UNKNOWN" envDefault:"false"` Safelist SafelistConfiguration `yaml:"safelist" envPrefix:"SAFELIST_"` Cache PersistedOperationsCacheConfig `yaml:"cache"` Storage PersistedOperationsStorageConfig `yaml:"storage"` + Manifest PQLManifestConfig `yaml:"manifest" envPrefix:"MANIFEST_"` } type SafelistConfiguration struct { diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index 6cef0fc2e2..3640470904 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -6,9 +6,7 @@ "version": { "type": "string", "description": "The version of the configuration file. This is used to ensure that the configuration file is compatible.", - "enum": [ - "1" - ] + "enum": ["1"] }, "instance_id": { "type": "string", @@ -39,10 +37,7 @@ "type": "array", "items": { "type": "object", - "required": [ - "id", - "url" - ], + "required": ["id", "url"], "additionalProperties": false, "properties": { "id": { @@ -61,10 +56,7 @@ "type": "array", "items": { "type": "object", - "required": [ - "id", - "urls" - ], + "required": ["id", "urls"], "additionalProperties": false, "properties": { "id": { @@ -91,11 +83,7 @@ "description": "The configuration for the S3 storage provider. If no access key and secret key are provided, the provider will attempt to retrieve IAM credentials from the EC2 service.", "items": { "type": "object", - "required": [ - "id", - "bucket", - "endpoint" - ], + "required": ["id", "bucket", "endpoint"], "additionalProperties": false, "properties": { "id": { @@ -134,10 +122,7 @@ "description": "The file system configuration. The file system provider is used to store and retrieve data from the local file system.", "items": { "type": "object", - "required": [ - "id", - "path" - ], + "required": ["id", "path"], "additionalProperties": false, "properties": { "id": { @@ -196,10 +181,7 @@ }, "storage": { "description": "The storage provider for persisted operation. Only one provider can be active. When no provider is specified, the router will fallback to the Cosmo CDN provider to download the persisted operations.", - "required": [ - "provider_id", - "object_prefix" - ], + "required": ["provider_id", "object_prefix"], "properties": { "provider_id": { "description": "The ID of the storage provider. The ID must match the ID of the storage provider in the storage_providers section.", @@ -210,6 +192,36 @@ "description": "The prefix of the object in the storage provider location. The prefix is put in front of the operation SHA256 hash. //.json" } } + }, + "manifest": { + "type": "object", + "additionalProperties": false, + "description": "The configuration for the PQL manifest. When enabled, the router downloads the full persisted operations manifest from the CDN and serves operations from memory.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable the PQL manifest feature.", + "default": false + }, + "poll_interval": { + "type": "string", + "format": "go-duration", + "description": "The interval at which the router polls the CDN for manifest updates. The period is specified as a string with a number and a unit, e.g. 10s, 1m, 1h. Minimum is 10s.", + "default": "10s", + "duration": { + "minimum": "10s" + } + }, + "poll_jitter": { + "type": "string", + "format": "go-duration", + "description": "The maximum random jitter added to each poll interval. The period is specified as a string with a number and a unit, e.g. 1s, 5s, 10s. Minimum is 1s.", + "default": "5s", + "duration": { + "minimum": "1s" + } + } + } } } }, @@ -217,9 +229,7 @@ "type": "object", "additionalProperties": false, "description": "The configuration for the automatic persisted queries (APQ).", - "required": [ - "enabled" - ], + "required": ["enabled"], "properties": { "enabled": { "type": "boolean", @@ -248,10 +258,7 @@ }, "storage": { "description": "The storage provider for automatic persisted operation. Only one provider can be active. When no provider is specified, the router will use a local in-memory cache for retaining APQ queries", - "required": [ - "provider_id", - "object_prefix" - ], + "required": ["provider_id", "object_prefix"], "properties": { "provider_id": { "description": "The ID of the storage provider. The ID must match the ID of the storage provider in the storage_providers section.", @@ -277,9 +284,7 @@ "type": "object", "description": "The configuration for the execution config file. The config file is used to load the execution config from the local file system. The file has precedence over the storage provider.", "additionalProperties": false, - "required": [ - "path" - ], + "required": ["path"], "dependentSchemas": { "watch_interval": { "properties": { @@ -318,10 +323,7 @@ "properties": { "storage": { "description": "The storage provider for the execution config. Only one provider can be active. When no provider is specified, the router will fallback to the Cosmo CDN provider to download the execution config. Updating the execution config is happening in the background without downtime.", - "required": [ - "provider_id", - "object_path" - ], + "required": ["provider_id", "object_path"], "properties": { "provider_id": { "description": "The ID of the storage provider. The ID must match the ID of the storage provider in the storage_providers section.", @@ -341,9 +343,7 @@ "properties": { "fallback_storage": { "description": "The fallback storage provider for the execution config in case the primary one fails.", - "required": [ - "enabled" - ], + "required": ["enabled"], "properties": { "enabled": { "type": "boolean", @@ -411,9 +411,7 @@ "type": "object", "description": "The configuration for the client authentication. The client authentication is used to authenticate the clients using the provided certificate.", "additionalProperties": false, - "required": [ - "cert_file" - ], + "required": ["cert_file"], "properties": { "required": { "type": "boolean", @@ -436,10 +434,7 @@ } }, "then": { - "required": [ - "cert_file", - "key_file" - ] + "required": ["cert_file", "key_file"] } }, "client": { @@ -554,9 +549,7 @@ "allow_list": { "type": "array", "description": "The names of the headers to forward. The default value is 'Authorization'.", - "default": [ - "Authorization" - ], + "default": ["Authorization"], "items": { "type": "string" } @@ -575,9 +568,7 @@ "allow_list": { "type": "array", "description": "The names of the query parameters to forward. The default value is 'Authorization'.", - "default": [ - "Authorization" - ], + "default": ["Authorization"], "items": { "type": "string" } @@ -692,10 +683,7 @@ "type": "string", "default": "redact", "description": "The method used to anonymize the IP addresses. The supported methods are 'redact' and 'hash'. The default value is 'redact'. The 'redact' method replaces the IP addresses with the string '[REDACTED]'. The 'hash' method hashes the IP addresses using the SHA-256 algorithm.", - "enum": [ - "redact", - "hash" - ] + "enum": ["redact", "hash"] } } } @@ -740,14 +728,7 @@ }, "level": { "type": "string", - "enum": [ - "debug", - "info", - "warn", - "error", - "panic", - "fatal" - ], + "enum": ["debug", "info", "warn", "error", "panic", "fatal"], "description": "The log level for access logs. The log level is used to control the verbosity of the access logs. The default value is 'info'.", "default": "info" }, @@ -830,9 +811,7 @@ "ignore_query_params_list": { "type": "array", "description": "List of query params to be ignored from being logged in the query field.", - "default": [ - "variables" - ], + "default": ["variables"], "items": { "type": "string" } @@ -905,10 +884,7 @@ "items": { "type": "object", "additionalProperties": false, - "required": [ - "key", - "value" - ], + "required": ["key", "value"], "properties": { "key": { "type": "string", @@ -928,9 +904,7 @@ "type": "object", "description": "The configuration for custom attributes. Custom attributes can be created from request headers or static values. Keep in mind, that every new custom attribute increases the cardinality of the pipeline.", "additionalProperties": false, - "required": [ - "key" - ], + "required": ["key"], "properties": { "key": { "type": "string", @@ -956,14 +930,10 @@ }, "oneOf": [ { - "required": [ - "request_header" - ] + "required": ["request_header"] }, { - "required": [ - "expression" - ] + "required": ["expression"] } ] } @@ -1010,9 +980,7 @@ "description": "The exporters to use to export the traces. If no exporters are specified, the default Cosmo Cloud exporter is used. If you override, please make sure to include the default exporter.", "items": { "type": "object", - "required": [ - "endpoint" - ], + "required": ["endpoint"], "additionalProperties": false, "properties": { "disabled": { @@ -1022,10 +990,7 @@ "type": "string", "description": "The exporter to use for the traces. The supported exporters are 'http' and 'grpc'.", "default": "http", - "enum": [ - "http", - "grpc" - ] + "enum": ["http", "grpc"] }, "endpoint": { "type": "string" @@ -1112,9 +1077,7 @@ "type": "object", "description": "The configuration for custom span attributes for subgraph tracing.", "additionalProperties": false, - "required": [ - "key" - ], + "required": ["key"], "properties": { "key": { "type": "string", @@ -1132,9 +1095,7 @@ }, "oneOf": [ { - "required": [ - "expression" - ] + "required": ["expression"] } ] } @@ -1178,9 +1139,7 @@ "type": "object", "description": "The configuration for custom attributes. Custom attributes can be created from request headers, static values or context fields. Not every context fields are available at all request life-cycle stages. If a value is a list, the value is JSON encoded for OTLP. For Prometheus, the values are exploded into multiple metrics with unique labels. Keep in mind, that every new custom attribute increases the cardinality.", "additionalProperties": false, - "required": [ - "key" - ], + "required": ["key"], "properties": { "key": { "type": "string", @@ -1277,10 +1236,7 @@ "type": "string", "description": "The exporter protocol to use to export metrics. The supported exporters are 'http' and 'grpc'.", "default": "http", - "enum": [ - "http", - "grpc" - ] + "enum": ["http", "grpc"] }, "endpoint": { "type": "string", @@ -1302,16 +1258,10 @@ "temporality": { "type": "string", "description": "Temporality defines the window that an aggregation is calculated over.", - "enum": [ - "delta", - "cumulative" - ] + "enum": ["delta", "cumulative"] } }, - "required": [ - "exporter", - "endpoint" - ] + "required": ["exporter", "endpoint"] } }, "exclude_metrics": { @@ -1498,32 +1448,18 @@ "allow_origins": { "type": "array", "description": "The allowed origins. The default value is to allow all origins. The value can be a list of origins or the wildcard '*'.", - "default": [ - "*" - ], + "default": ["*"], "items": { "type": "string" } }, "allow_methods": { "type": "array", - "default": [ - "GET", - "POST", - "HEAD" - ], + "default": ["GET", "POST", "HEAD"], "description": "The allowed HTTP methods. The default value is to allow the methods 'GET', 'POST', and 'HEAD'.", "items": { "type": "string", - "enum": [ - "GET", - "POST", - "HEAD", - "PUT", - "DELETE", - "PATCH", - "OPTIONS" - ] + "enum": ["GET", "POST", "HEAD", "PUT", "DELETE", "PATCH", "OPTIONS"] } }, "allow_headers": { @@ -1653,14 +1589,7 @@ }, "log_level": { "type": "string", - "enum": [ - "debug", - "info", - "warn", - "error", - "panic", - "fatal" - ], + "enum": ["debug", "info", "warn", "error", "panic", "fatal"], "description": "The log level. The log level is used to control the verbosity of the logs. The default value is 'info'.", "default": "info" }, @@ -1911,23 +1840,17 @@ "enabled": { "type": "boolean", "description": "Determines whether cache control policy is enabled.", - "examples": [ - true - ] + "examples": [true] }, "value": { "type": "string", "description": "Global cache control value.", - "examples": [ - "max-age=180, public" - ] + "examples": ["max-age=180, public"] }, "subgraphs": { "type": "array", "description": "Subgraph-specific cache control settings.", - "required": [ - "name" - ], + "required": ["name"], "additionalProperties": false, "items": { "type": "object", @@ -1935,24 +1858,18 @@ "name": { "type": "string", "description": "Name of the subgraph.", - "examples": [ - "products" - ] + "examples": ["products"] }, "value": { "type": "string", "description": "Cache control value for the subgraph.", - "examples": [ - "max-age=60, public" - ] + "examples": ["max-age=60, public"] } } } } }, - "required": [ - "enabled" - ], + "required": ["enabled"], "additionalProperties": false }, "modules": { @@ -1999,11 +1916,7 @@ "symmetric_algorithm": { "type": "string", "description": "The symmetric algorithm used", - "enum": [ - "HS256", - "HS384", - "HS512" - ] + "enum": ["HS256", "HS384", "HS512"] }, "header_key_id": { "type": "string", @@ -2015,11 +1928,7 @@ "default": ["sig"], "items": { "type": "string", - "enum": [ - "sig", - "enc", - "" - ] + "enum": ["sig", "enc", ""] } }, "algorithms": { @@ -2089,56 +1998,36 @@ }, "oneOf": [ { - "required": [ - "url" - ], + "required": ["url"], "not": { "anyOf": [ { - "required": [ - "secret" - ] + "required": ["secret"] }, { - "required": [ - "symmetric_algorithm" - ] + "required": ["symmetric_algorithm"] }, { - "required": [ - "header_key_id" - ] + "required": ["header_key_id"] } ] } }, { - "required": [ - "secret", - "symmetric_algorithm", - "header_key_id" - ], + "required": ["secret", "symmetric_algorithm", "header_key_id"], "not": { "anyOf": [ { - "required": [ - "url" - ] + "required": ["url"] }, { - "required": [ - "algorithms" - ] + "required": ["algorithms"] }, { - "required": [ - "refresh_interval" - ] + "required": ["refresh_interval"] }, { - "required": [ - "refresh_unknown_kid" - ] + "required": ["refresh_unknown_kid"] } ] } @@ -2166,17 +2055,13 @@ "type": { "type": "string", "description": "The type of the source. The only currently supported type is 'header'.", - "enum": [ - "header" - ] + "enum": ["header"] }, "name": { "type": "string", "description": "The name of the header. The header is used to extract the token from the request.", "format": "http-header", - "examples": [ - "X-Authorization" - ] + "examples": ["X-Authorization"] }, "value_prefixes": { "type": "array", @@ -2186,10 +2071,7 @@ } } }, - "required": [ - "type", - "name" - ] + "required": ["type", "name"] } } } @@ -2225,9 +2107,7 @@ }, "strategy": { "type": "string", - "enum": [ - "simple" - ], + "enum": ["simple"], "description": "The strategy used to enforce the rate limit. The supported strategies are 'simple'." }, "simple_strategy": { @@ -2266,18 +2146,12 @@ "description": "Hide the rate limit stats from the response extension. If the value is true, the rate limit stats are not included in the response extension." } }, - "required": [ - "rate", - "burst", - "period" - ] + "required": ["rate", "burst", "period"] }, "storage": { "type": "object", "additionalProperties": false, - "required": [ - "urls" - ], + "required": ["urls"], "properties": { "cluster_enabled": { "type": "boolean", @@ -2399,9 +2273,7 @@ "description": "The ID of the storage provider to use for loading GraphQL operations. Only storage provider of type 'file_system' are supported. The provider must be configured in the storage_providers section." } }, - "required": [ - "provider_id" - ] + "required": ["provider_id"] }, "session": { "type": "object", @@ -2522,10 +2394,7 @@ "description": "Configuration used by the EDFS provider to connect to the NATS server.", "items": { "type": "object", - "required": [ - "id", - "url" - ], + "required": ["id", "url"], "additionalProperties": false, "properties": { "id": { @@ -2543,9 +2412,7 @@ "oneOf": [ { "type": "object", - "required": [ - "token" - ], + "required": ["token"], "additionalProperties": false, "properties": { "token": { @@ -2562,10 +2429,7 @@ "type": "object", "description": "Userinfo configuration for the NATS provider.", "additionalProperties": false, - "required": [ - "username", - "password" - ], + "required": ["username", "password"], "properties": { "username": { "type": "string", @@ -2595,10 +2459,7 @@ "items": { "type": "object", "additionalProperties": false, - "required": [ - "id", - "brokers" - ], + "required": ["id", "brokers"], "properties": { "id": { "type": "string", @@ -2630,18 +2491,13 @@ { "type": "object", "additionalProperties": false, - "required": [ - "sasl_plain" - ], + "required": ["sasl_plain"], "properties": { "sasl_plain": { "type": "object", "description": "Plain SASL Authentication configuration for the Kafka provider.", "additionalProperties": false, - "required": [ - "username", - "password" - ], + "required": ["username", "password"], "properties": { "username": { "type": "string", @@ -2658,19 +2514,13 @@ { "type": "object", "additionalProperties": false, - "required": [ - "sasl_scram" - ], + "required": ["sasl_scram"], "properties": { "sasl_scram": { "type": "object", "description": "SCRAM SASL Authentication configuration for the Kafka provider.", "additionalProperties": false, - "required": [ - "username", - "password", - "mechanism" - ], + "required": ["username", "password", "mechanism"], "properties": { "username": { "type": "string", @@ -2683,10 +2533,7 @@ "mechanism": { "type": "string", "description": "The mechanism for SCRAM SASL authentication.", - "enum": [ - "SCRAM-SHA-256", - "SCRAM-SHA-512" - ] + "enum": ["SCRAM-SHA-256", "SCRAM-SHA-512"] } } } @@ -2708,10 +2555,7 @@ "items": { "type": "object", "additionalProperties": false, - "required": [ - "id", - "urls" - ], + "required": ["id", "urls"], "properties": { "id": { "type": "string", @@ -3422,10 +3266,7 @@ }, "mode": { "type": "string", - "enum": [ - "wrapped", - "pass-through" - ], + "enum": ["wrapped", "pass-through"], "default": "wrapped", "description": "The mode of error propagation. The supported modes are 'wrapped' (default) and 'pass-through'. The 'wrapped' mode wraps the error in a custom error object to hide internals. The 'pass-through' mode returns the error as is from the Subgraph." }, @@ -3449,9 +3290,7 @@ "items": { "type": "string" }, - "default": [ - "code" - ], + "default": ["code"], "description": "The allowed extension fields. The allowed extension fields are used to specify which fields of the Subgraph errors are allowed to be propagated to the client." }, "allow_all_extension_fields": { @@ -3845,9 +3684,7 @@ "algorithm": { "type": "string", "description": "The algorithm used to calculate the retry interval. The supported algorithms are 'backoff_jitter'.", - "enum": [ - "backoff_jitter" - ] + "enum": ["backoff_jitter"] }, "max_attempts": { "type": "integer", @@ -3883,58 +3720,40 @@ "properties": { "op": { "type": "string", - "enum": [ - "propagate" - ], - "examples": [ - "propagate" - ], + "enum": ["propagate"], + "examples": ["propagate"], "description": "The operation to perform on the header. The supported operations are 'propagate'. The 'propagate' operation is used to propagate the header to the subgraphs." }, "matching": { "type": "string", - "examples": [ - "(?i)^X-Custom-.*" - ], + "examples": ["(?i)^X-Custom-.*"], "description": "The matching rule for the header. The matching rule is a regular expression that is used to match the header. Can't be used with 'named'." }, "negate_match": { "type": "boolean", - "examples": [ - "true" - ], + "examples": ["true"], "description": "If set to true, the result of the 'matching' regex will be inverted. This is useful for simulating negative lookahead behavior, which is not natively supported." }, "named": { "type": "string", - "examples": [ - "X-Test-Header" - ], + "examples": ["X-Test-Header"], "description": "The name of the header to match. Use the canonical version e.g. X-Test-Header. Can't be used with 'matching'." }, "rename": { "type": "string", - "examples": [ - "X-Rename-Test-Header" - ], + "examples": ["X-Rename-Test-Header"], "description": "Rename is used to rename the named or the matching headers. It can be used with either the named or the matching." }, "default": { "type": "string", - "examples": [ - "default-value" - ], + "examples": ["default-value"], "description": "The default value of the header in case it is not present in the request." } }, "dependentRequired": { - "negate_match": [ - "matching" - ] + "negate_match": ["matching"] }, - "required": [ - "op" - ] + "required": ["op"] }, "traffic_shaping_header_response_rule": { "type": "object", @@ -3943,71 +3762,46 @@ "properties": { "op": { "type": "string", - "enum": [ - "propagate" - ], - "examples": [ - "propagate" - ], + "enum": ["propagate"], + "examples": ["propagate"], "description": "The operation to perform on the header. The supported operations are 'propagate'. The 'propagate' operation is used to propagate the header to the subgraphs." }, "matching": { "type": "string", - "examples": [ - "(?i)^X-Custom-.*" - ], + "examples": ["(?i)^X-Custom-.*"], "description": "The matching rule for the header. The matching rule is a regular expression that is used to match the header. Can't be used with 'named'." }, "negate_match": { "type": "boolean", - "examples": [ - "true" - ], + "examples": ["true"], "description": "If set to true, the result of the 'matching' regex will be inverted. This is useful for simulating negative lookahead behavior, which is not natively supported." }, "named": { "type": "string", - "examples": [ - "X-Test-Header" - ], + "examples": ["X-Test-Header"], "description": "The name of the header to match. Use the canonical version e.g. X-Test-Header. Can't be used with 'matching'." }, "rename": { "type": "string", - "examples": [ - "X-Rename-Test-Header" - ], + "examples": ["X-Rename-Test-Header"], "description": "Rename is used to rename the named or the matching headers. It can be used with either the named or the matching." }, "default": { "type": "string", - "examples": [ - "default-value" - ], + "examples": ["default-value"], "description": "The default value of the header in case it is not present in the request." }, "algorithm": { "type": "string", - "enum": [ - "first_write", - "last_write", - "append" - ], - "examples": [ - "first_write" - ], + "enum": ["first_write", "last_write", "append"], + "examples": ["first_write"], "description": "The algorith, to use when multiple headers are present. The supported operations are '\"first_write\", \"last_write\", and \"append\". The 'first_write' retains the first value of a given header. The 'last_write' retains the last value of a given header. The 'append' appends all values of a given header." } }, "dependentRequired": { - "negate_match": [ - "matching" - ] + "negate_match": ["matching"] }, - "required": [ - "op", - "algorithm" - ] + "required": ["op", "algorithm"] }, "router_response_header_rule": { "type": "object", @@ -4038,16 +3832,12 @@ }, "name": { "type": "string", - "examples": [ - "X-API-Key" - ], + "examples": ["X-API-Key"], "description": "The name of the header to set." }, "value": { "type": "string", - "examples": [ - "My-Secret-Value" - ], + "examples": ["My-Secret-Value"], "description": "The value to set for the header. This can include environment variables." }, "expression": { @@ -4058,39 +3848,26 @@ "type": "object", "description": "DEPRECATED: Use expression instead. The configuration for the value from. The value from is used to extract a value from a request context and propagate it to subgraphs. This is currently only valid in requests", "additionalProperties": false, - "required": [ - "context_field" - ], + "required": ["context_field"], "properties": { "context_field": { "type": "string", "description": "The field name of the context from which to extract the value. The value is only extracted when a context is available otherwise the default value is used.", - "enum": [ - "operation_name" - ] + "enum": ["operation_name"] } } } }, - "required": [ - "op", - "name" - ], + "required": ["op", "name"], "oneOf": [ { - "required": [ - "value" - ] + "required": ["value"] }, { - "required": [ - "expression" - ] + "required": ["expression"] }, { - "required": [ - "value_from" - ] + "required": ["value_from"] } ] }, @@ -4101,9 +3878,7 @@ "type": "object", "description": "The configuration for custom fields. Custom attributes can be created from request headers or context fields. Not every context fields are available at all request life-cycle stages. If a value is a list, the value is JSON encoded for OTLP. For Prometheus, the values are exploded into multiple metrics with unique labels. Keep in mind, that every new custom attribute increases the cardinality.", "additionalProperties": false, - "required": [ - "key" - ], + "required": ["key"], "properties": { "key": { "type": "string", @@ -4156,4 +3931,4 @@ } } } -} \ No newline at end of file +} diff --git a/router/pkg/config/config_test.go b/router/pkg/config/config_test.go index 9bf81b8903..5a98f95988 100644 --- a/router/pkg/config/config_test.go +++ b/router/pkg/config/config_test.go @@ -1000,6 +1000,10 @@ func TestConfigMerging(t *testing.T) { ProviderID: "s3", ObjectPrefix: "ee", }, + Manifest: PQLManifestConfig{ + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, }, AutomaticPersistedQueries: AutomaticPersistedQueriesConfig{ Storage: AutomaticPersistedQueriesStorageConfig{ @@ -1680,3 +1684,108 @@ access_logs: } }) } + +func TestPQLManifestConfig(t *testing.T) { + t.Run("defaults", func(t *testing.T) { + t.Parallel() + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" +`) + cfg, err := LoadConfig([]string{f}) + require.NoError(t, err) + + require.False(t, cfg.Config.PersistedOperationsConfig.Manifest.Enabled) + require.Equal(t, 10*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollInterval) + require.Equal(t, 5*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollJitter) + }) + + t.Run("yaml config", func(t *testing.T) { + t.Parallel() + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" + +persisted_operations: + manifest: + enabled: true + poll_interval: 60s + poll_jitter: 15s +`) + cfg, err := LoadConfig([]string{f}) + require.NoError(t, err) + + require.True(t, cfg.Config.PersistedOperationsConfig.Manifest.Enabled) + require.Equal(t, 60*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollInterval) + require.Equal(t, 15*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollJitter) + }) + + t.Run("env variables", func(t *testing.T) { + t.Setenv("PERSISTED_OPERATIONS_MANIFEST_ENABLED", "true") + t.Setenv("PERSISTED_OPERATIONS_MANIFEST_POLL_INTERVAL", "45s") + t.Setenv("PERSISTED_OPERATIONS_MANIFEST_POLL_JITTER", "8s") + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" +`) + cfg, err := LoadConfig([]string{f}) + require.NoError(t, err) + + require.True(t, cfg.Config.PersistedOperationsConfig.Manifest.Enabled) + require.Equal(t, 45*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollInterval) + require.Equal(t, 8*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollJitter) + }) + + t.Run("poll_interval below minimum rejected", func(t *testing.T) { + t.Parallel() + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" + +persisted_operations: + manifest: + enabled: true + poll_interval: 5s +`) + _, err := LoadConfig([]string{f}) + + var js *jsonschema.ValidationError + require.ErrorAs(t, err, &js) + require.Equal(t, []string{"persisted_operations", "manifest", "poll_interval"}, js.Causes[0].InstanceLocation) + require.Equal(t, "at '/persisted_operations/manifest/poll_interval': duration must be greater or equal than 10s", js.Causes[0].Error()) + }) + + t.Run("poll_jitter below minimum rejected", func(t *testing.T) { + t.Parallel() + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" + +persisted_operations: + manifest: + enabled: true + poll_jitter: 500ms +`) + _, err := LoadConfig([]string{f}) + + var js *jsonschema.ValidationError + require.ErrorAs(t, err, &js) + require.Equal(t, []string{"persisted_operations", "manifest", "poll_jitter"}, js.Causes[0].InstanceLocation) + require.Equal(t, "at '/persisted_operations/manifest/poll_jitter': duration must be greater or equal than 1s", js.Causes[0].Error()) + }) +} diff --git a/router/pkg/config/fixtures/full.yaml b/router/pkg/config/fixtures/full.yaml index fb93684dd5..0187e4ff8d 100644 --- a/router/pkg/config/fixtures/full.yaml +++ b/router/pkg/config/fixtures/full.yaml @@ -24,7 +24,7 @@ introspection: enabled: true secret: 'AN_EXAMPLE_PLACEHOLDER_SECRET_ONLY' json_log: true -log_service_name: "my-custom-router" +log_service_name: 'my-custom-router' shutdown_delay: 15s grace_period: 20s poll_interval: 10s @@ -199,7 +199,7 @@ telemetry: schema_usage: enabled: true include_operation_sha: true - sample_rate: 1.0 # Supports any rate: 1.0, 0.8, 0.5, 0.1, 0.01, etc. + sample_rate: 1.0 # Supports any rate: 1.0, 0.8, 0.5, 0.1, 0.01, etc. cache_control_policy: enabled: true @@ -483,6 +483,10 @@ persisted_operations: storage: provider_id: s3 object_prefix: '5ef73d80-cae4-4d0e-98a7-1e9fa922c1a4/92c25b45-a75b-4954-b8f6-6592a9b203eb/operations/foo' + manifest: + enabled: true + poll_interval: 30s + poll_jitter: 10s automatic_persisted_queries: enabled: true diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index c5cc0b6e40..955163b924 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -531,6 +531,11 @@ "Storage": { "ProviderID": "", "ObjectPrefix": "" + }, + "Manifest": { + "Enabled": false, + "PollInterval": 10000000000, + "PollJitter": 5000000000 } }, "AutomaticPersistedQueries": { diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index 98c86135c7..2595c1bf30 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -960,6 +960,11 @@ "Storage": { "ProviderID": "s3", "ObjectPrefix": "5ef73d80-cae4-4d0e-98a7-1e9fa922c1a4/92c25b45-a75b-4954-b8f6-6592a9b203eb/operations/foo" + }, + "Manifest": { + "Enabled": true, + "PollInterval": 30000000000, + "PollJitter": 10000000000 } }, "AutomaticPersistedQueries": { From 5def8fae7ee235071bdd7bf5aab58d7af6c74176 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 20:41:42 +0100 Subject: [PATCH 02/31] feat: update persisted operations tests and improve manifest handling --- .../test/persisted-operations.test.ts | 4 +- router-tests/pql_manifest_test.go | 72 ++++++------------- .../persistedoperation/pqlmanifest/poller.go | 2 +- .../persistedoperation/pqlmanifest/store.go | 43 ++++------- 4 files changed, 40 insertions(+), 81 deletions(-) diff --git a/controlplane/test/persisted-operations.test.ts b/controlplane/test/persisted-operations.test.ts index acd3292b8c..3f042c76a9 100644 --- a/controlplane/test/persisted-operations.test.ts +++ b/controlplane/test/persisted-operations.test.ts @@ -595,7 +595,7 @@ describe('Persisted operations', (ctx) => { await setupFederatedGraph(fedGraphName, client); const queryA = `query { hello }`; - const queryB = `query { world }`; + const queryB = `query { __typename }`; const publishResp1 = await client.publishPersistedOperations({ fedGraphName, @@ -639,7 +639,7 @@ describe('Persisted operations', (ctx) => { await setupFederatedGraph(fedGraphName, client); const query1 = `query { hello }`; - const query2 = `query { world }`; + const query2 = `query { __typename }`; const op1Id = genID('op1'); const op2Id = genID('op2'); diff --git a/router-tests/pql_manifest_test.go b/router-tests/pql_manifest_test.go index 6d7d8d3f55..2bb2b11512 100644 --- a/router-tests/pql_manifest_test.go +++ b/router-tests/pql_manifest_test.go @@ -15,6 +15,23 @@ import ( "go.uber.org/zap/zapcore" ) +// getCDNRequests returns all recorded HTTP requests from the CDN test server. +// The CDN test server records every request path it receives. Calling GET on +// its base URL returns these as a JSON array of strings (e.g. "GET /org/graph/operations/..."). +func getCDNRequests(t *testing.T, cdnURL string) []string { + t.Helper() + resp, err := http.Get(cdnURL) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var requests []string + err = json.Unmarshal(body, &requests) + require.NoError(t, err) + return requests +} + func TestPQLManifest(t *testing.T) { t.Parallel() @@ -91,19 +108,8 @@ func TestPQLManifest(t *testing.T) { require.Equal(t, expectedEmployeesBody, res.Body) } - // Check CDN request log - only manifest request, no individual operation requests - resp, err := http.Get(xEnv.CDN.URL) - require.NoError(t, err) - defer resp.Body.Close() - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - - var cdnRequests []string - err = json.Unmarshal(body, &cdnRequests) - require.NoError(t, err) - - // Should have manifest request(s) but no individual operation requests - for _, req := range cdnRequests { + // With manifest enabled, the router should never call CDN for individual operations + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { require.False(t, strings.Contains(req, "/operations/my-client/"), "expected no individual operation CDN requests, but got: %s", req) } @@ -212,21 +218,9 @@ func TestPQLManifest(t *testing.T) { require.NoError(t, err) require.Equal(t, expectedEmployeesBody, res.Body) - // Verify CDN was hit for the individual operation - resp, err := http.Get(xEnv.CDN.URL) - require.NoError(t, err) - defer resp.Body.Close() - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - - var cdnRequests []string - err = json.Unmarshal(body, &cdnRequests) - require.NoError(t, err) - - // CDN should have been called for the individual operation, not the manifest hasOperationRequest := false hasManifestRequest := false - for _, req := range cdnRequests { + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { if strings.Contains(req, "/operations/my-client/") { hasOperationRequest = true } @@ -269,20 +263,9 @@ func TestPQLManifest(t *testing.T) { require.NoError(t, err) require.Equal(t, persistedNotFoundResp, res.Body) - // Verify CDN was hit for individual operations, not manifest - resp, err := http.Get(xEnv.CDN.URL) - require.NoError(t, err) - defer resp.Body.Close() - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - - var cdnRequests []string - err = json.Unmarshal(body, &cdnRequests) - require.NoError(t, err) - hasOperationRequest := false hasManifestRequest := false - for _, req := range cdnRequests { + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { if strings.Contains(req, "/operations/my-client/") { hasOperationRequest = true } @@ -324,19 +307,8 @@ func TestPQLManifest(t *testing.T) { logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() require.Len(t, logEntries, 1) - // Verify CDN was used, not manifest - resp, err := http.Get(xEnv.CDN.URL) - require.NoError(t, err) - defer resp.Body.Close() - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - - var cdnRequests []string - err = json.Unmarshal(body, &cdnRequests) - require.NoError(t, err) - hasManifestRequest := false - for _, req := range cdnRequests { + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { if strings.Contains(req, "/operations/manifest.json") { hasManifestRequest = true } diff --git a/router/internal/persistedoperation/pqlmanifest/poller.go b/router/internal/persistedoperation/pqlmanifest/poller.go index a35dd26bc3..7055c5542a 100644 --- a/router/internal/persistedoperation/pqlmanifest/poller.go +++ b/router/internal/persistedoperation/pqlmanifest/poller.go @@ -67,7 +67,7 @@ func (p *Poller) Poll(ctx context.Context) { if changed && manifest != nil { p.store.Load(manifest) - p.logger.Info("Updated PQL manifest", + p.logger.Debug("Updated PQL manifest", zap.String("revision", manifest.Revision), zap.String("previous_revision", currentRevision), zap.Int("operation_count", len(manifest.Operations)), diff --git a/router/internal/persistedoperation/pqlmanifest/store.go b/router/internal/persistedoperation/pqlmanifest/store.go index d330740f4a..b04985a8a5 100644 --- a/router/internal/persistedoperation/pqlmanifest/store.go +++ b/router/internal/persistedoperation/pqlmanifest/store.go @@ -1,7 +1,7 @@ package pqlmanifest import ( - "sync" + "sync/atomic" "go.uber.org/zap" ) @@ -14,8 +14,7 @@ type Manifest struct { } type Store struct { - mu sync.RWMutex - manifest *Manifest + manifest atomic.Pointer[Manifest] logger *zap.Logger } @@ -25,23 +24,19 @@ func NewStore(logger *zap.Logger) *Store { } } -// Load write-locks and swaps the manifest atomically. +// Load swaps the manifest atomically. func (s *Store) Load(manifest *Manifest) { - s.mu.Lock() - defer s.mu.Unlock() - s.manifest = manifest + s.manifest.Store(manifest) } -// LookupByHash read-locks and performs an O(1) map lookup by sha256 hash. +// LookupByHash performs an O(1) map lookup by sha256 hash. func (s *Store) LookupByHash(sha256Hash string) (body []byte, found bool) { - s.mu.RLock() - defer s.mu.RUnlock() - - if s.manifest == nil { + m := s.manifest.Load() + if m == nil { return nil, false } - op, ok := s.manifest.Operations[sha256Hash] + op, ok := m.Operations[sha256Hash] if !ok { return nil, false } @@ -51,31 +46,23 @@ func (s *Store) LookupByHash(sha256Hash string) (body []byte, found bool) { // IsLoaded returns whether a manifest has been loaded. func (s *Store) IsLoaded() bool { - s.mu.RLock() - defer s.mu.RUnlock() - return s.manifest != nil + return s.manifest.Load() != nil } // Revision returns the current manifest revision for polling. func (s *Store) Revision() string { - s.mu.RLock() - defer s.mu.RUnlock() - - if s.manifest == nil { + m := s.manifest.Load() + if m == nil { return "" } - - return s.manifest.Revision + return m.Revision } // OperationCount returns the number of operations in the manifest. func (s *Store) OperationCount() int { - s.mu.RLock() - defer s.mu.RUnlock() - - if s.manifest == nil { + m := s.manifest.Load() + if m == nil { return 0 } - - return len(s.manifest.Operations) + return len(m.Operations) } From 980f7a92ed728cab4132700010fe6116514c3fb8 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 20:44:11 +0100 Subject: [PATCH 03/31] feat: enhance README with PQL manifest details and lookup order --- router/internal/persistedoperation/README.md | 41 ++++++++++++++------ 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/router/internal/persistedoperation/README.md b/router/internal/persistedoperation/README.md index 7760a97463..dcab23a0d0 100644 --- a/router/internal/persistedoperation/README.md +++ b/router/internal/persistedoperation/README.md @@ -1,17 +1,36 @@ # Persisted Operations -Persisted operations are stored queries, which can be executed just by providing the sha256hash of the operation to the router. This is useful for multiple purposes, including: -* large/frequently requested queries, which can be stored to avoid sending them over the network multiple times -* for security purposes, where a consumer can specify the specific operations which can be run, and the router can verify that the operation is one of the allowed ones +Persisted operations are stored queries, which can be executed just by providing the sha256hash of the operation to the router. This is useful for multiple purposes, including: -Specifically for those two purposes, we enable two different methods of storing persisted operations: -1. **Persisted Operation Files** - This operation, documented [here](https://cosmo-docs.wundergraph.com/router/persisted-operations), allows users to store persisted operations in files in a CDN/S3 bucket, which are then loaded by the router. This is both useful for storing large queries, as well as by reducing the router's attack surface by only allowing registered operations -2. **Automatic Persisted Queries** - This setting allows users to automatically cache queries that are sent, as long as they are sent together with their sha256hash. This is a useful performance optimizer, as it allows the router to cache queries that are frequently requested, without the need to manually store them in a file. +- large/frequently requested queries, which can be stored to avoid sending them over the network multiple times +- for security purposes, where a consumer can specify the specific operations which can be run, and the router can verify that the operation is one of the allowed ones -These two uses can exist in concert - users can save a number of particular operations in persisted operation files, and then use automatic persisted queries to cache the rest of the queries that are sent to the router. +Specifically for those purposes, we enable three different methods of storing persisted operations: + +1. **Persisted Operation Files** - This operation, documented [here](https://cosmo-docs.wundergraph.com/router/persisted-operations), allows users to store persisted operations in files in a CDN/S3 bucket, which are then loaded by the router individually per request. This is both useful for storing large queries, as well as by reducing the router's attack surface by only allowing registered operations. +2. **PQL Manifest** - When enabled, the router downloads a single JSON manifest containing all persisted operations at startup and polls for updates periodically. Operations are resolved entirely in-memory with zero per-request network overhead. When the manifest is enabled, it is authoritative — no CDN fallback occurs for individual operations. See the `pqlmanifest` subpackage. +3. **Automatic Persisted Queries** - This setting allows users to automatically cache queries that are sent, as long as they are sent together with their sha256hash. This is a useful performance optimizer, as it allows the router to cache queries that are frequently requested, without the need to manually store them in a file. + +These methods can exist in concert — for example, users can enable the PQL manifest for zero-latency lookups and use APQ to cache ad-hoc queries. + +## Lookup Order + +When a persisted operation request arrives, the client resolves it in this order: + +1. **APQ cache** — if APQ is enabled and the hash is cached, use it +2. **In-memory normalization cache** — if the operation was previously resolved and cached locally +3. **PQL manifest** — if a manifest is loaded, look up the hash in-memory. If found, return the body. If not found, the manifest is authoritative: the operation does not exist (no CDN fallback) +4. **CDN/S3/FS fallback** — only when the manifest is **not** enabled, fetch the individual operation file from CDN, S3, or the filesystem ## Flows -1. **Persisted Operations, no APQ** → In this scenario, the router will only execute queries that are stored in persisted operation files. If a query is not found in the persisted operation files, the router will return an error if a user tries calling a `persisted operation` with an unknown sha. After the query is planned, the router will cache the normalized query in the local persisted operation cache. -1. **APQ, No Persisted Operations** → In this scenario, if a `persisted_operation` request is sent, the router will first check if there is an APQ cached that matches. If a query is found, the router will execute the query. If a query is not found, the router will look if a query was submitted together with the persisted operation hash. If so, it will execute that query and save it in the cache for the future, and if not, the router will return an error. -1. **No APQ, No Persisted Operations** → If a persisted operation is sent, the router will return an error, as there are no persisted operations stored. Even if a query is sent, the router will still error because APQ isn't enabled. -1. **APQ and Persisted Operations** → In this scenario, the router will first check if the query was stored as an APQ. If it is, the router will execute the query. If it is not, the router will check the persistent query files. If the query is found, the router will execute the query. If the query is not found, the router will check if the query was sent together with the persisted operation hash. If it was, the router will execute the query and save it in the APQ cache for the future. If it was not, the router will return an error. + +1. **Persisted Operations (CDN), no APQ** → The router fetches individual operations from CDN/S3 on demand. If a query is not found, the router returns an error. After the query is planned, the router caches the normalized query in the local persisted operation cache. +1. **PQL Manifest, no APQ** → The router downloads the manifest at startup and polls for updates. All lookups are in-memory. Unknown hashes are rejected immediately without any network call. +1. **APQ, No Persisted Operations** → If a `persisted_operation` request is sent, the router checks the APQ cache first. If not found, it checks if a query body was sent with the request. If so, it executes and caches it. Otherwise, the router returns an error. +1. **No APQ, No Persisted Operations** → If a persisted operation is sent, the router returns an error, as there are no persisted operations stored. Even if a query is sent, the router will still error because APQ isn't enabled. +1. **APQ and Persisted Operations** → The router checks APQ first, then the PQL manifest or CDN (depending on config), then checks if a query body was attached. First match wins. + +## Enforcement Modes + +- **safelist** — when enabled, only operations found in persisted storage (manifest or CDN) are allowed. Ad-hoc queries are rejected with `PersistedQueryNotFound`. +- **log_unknown** — when enabled, ad-hoc queries that are not in persisted storage are logged but still allowed. Combined with safelist, unknown queries are both logged and rejected. From f2101bd47a72d65d2ce962c288af7a743d087f51 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 21:17:32 +0100 Subject: [PATCH 04/31] feat: implement persisted query list (PQL) support in configuration and client --- cdn-server/cdn/src/index.ts | 3 +++ .../persisted-operation/publishPersistedOperations.ts | 3 +++ demo/pkg/subgraphs/subgraphs.go | 4 ++-- router/internal/persistedoperation/client.go | 1 - .../persistedoperation/operationstorage/cdn/client.go | 3 +++ 5 files changed, 11 insertions(+), 3 deletions(-) diff --git a/cdn-server/cdn/src/index.ts b/cdn-server/cdn/src/index.ts index 828d7cdca0..3dc2954843 100644 --- a/cdn-server/cdn/src/index.ts +++ b/cdn-server/cdn/src/index.ts @@ -106,6 +106,9 @@ const jwtMiddleware = (secret: string | ((c: Context) => string)) => { }; }; +// Deprecated: Individual persisted operation lookups via CDN are deprecated. +// The router now downloads all operations at once via the PQL manifest, avoiding +// per-request latency. This handler is kept for backward compatibility with older routers. const persistedOperation = (storage: BlobStorage) => { return async (c: Context) => { const organizationId = c.get('authenticatedOrganizationId'); diff --git a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts index acf33b383b..b72aa6f029 100644 --- a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts +++ b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts @@ -203,6 +203,9 @@ export function publishPersistedOperations( version: 1, body: operation.contents, }; + // Deprecated: Uploading individual operations to blob storage is deprecated. + // The router now downloads all operations at once via the PQL manifest, avoiding + // per-request CDN latency. This upload is kept for backward compatibility with older routers. try { await opts.blobStorage.putObject({ key: path, diff --git a/demo/pkg/subgraphs/subgraphs.go b/demo/pkg/subgraphs/subgraphs.go index 44764f14bd..3b5d294fca 100644 --- a/demo/pkg/subgraphs/subgraphs.go +++ b/demo/pkg/subgraphs/subgraphs.go @@ -212,7 +212,7 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { natsPubSubByProviderID := map[string]natsPubsub.Adapter{} - defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", false, datasource.ProviderOpts{ + defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ StreamMetricStore: rmetric.NewNoopStreamMetricStore(), }) if err != nil { @@ -223,7 +223,7 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { } natsPubSubByProviderID["default"] = defaultAdapter - myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", false, datasource.ProviderOpts{ + myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ StreamMetricStore: rmetric.NewNoopStreamMetricStore(), }) if err != nil { diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index 568a8d2130..098b9d16af 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -79,7 +79,6 @@ func (c *Client) PersistedOperation(ctx context.Context, clientName string, sha2 // PQL manifest check (local, no network) if c.pqlStore != nil && c.pqlStore.IsLoaded() { if body, found := c.pqlStore.LookupByHash(sha256Hash); found { - c.cache.Set(clientName, sha256Hash, body, 0) return body, false, nil } // Manifest is authoritative — operation not found diff --git a/router/internal/persistedoperation/operationstorage/cdn/client.go b/router/internal/persistedoperation/operationstorage/cdn/client.go index d16f7e6bf6..f747931828 100644 --- a/router/internal/persistedoperation/operationstorage/cdn/client.go +++ b/router/internal/persistedoperation/operationstorage/cdn/client.go @@ -24,6 +24,9 @@ type Options struct { Logger *zap.Logger } +// Deprecated: The CDN-based persisted operation client is deprecated. +// The router now downloads all operations at once via the PQL manifest, avoiding +// per-request CDN latency. This client is kept for backward compatibility. var _ persistedoperation.StorageClient = (*client)(nil) type client struct { From 1ac5f25b8480b3ae66ae1a4493b5da87297a0236 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 21:32:06 +0100 Subject: [PATCH 05/31] feat: refactor persisted operation deletion and manifest generation logic --- .../deletePersistedOperation.ts | 57 ++++++++++--------- .../persisted-operation/generateManifest.ts | 4 ++ .../core/repositories/OperationsRepository.ts | 16 +++--- router/internal/persistedoperation/README.md | 8 ++- router/internal/persistedoperation/client.go | 2 +- .../persistedoperation/pqlmanifest/fetcher.go | 10 ++++ .../persistedoperation/pqlmanifest/poller.go | 6 ++ 7 files changed, 66 insertions(+), 37 deletions(-) diff --git a/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts b/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts index 446fffba01..27e18e87cc 100644 --- a/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts +++ b/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts @@ -75,32 +75,7 @@ export function deletePersistedOperation( }); try { - await Promise.all([ - opts.blobStorage.deleteObject({ - key: path, - }), - generateAndUploadManifest({ - db: opts.db, - federatedGraphId: federatedGraph.id, - organizationId: authContext.organizationId, - blobStorage: opts.blobStorage, - logger, - }), - ]); - - return { - response: { - code: EnumStatusCode.OK, - }, - operation: deletedOperation - ? { - id: deletedOperation.id, - operationId: deletedOperation.operationId, - clientName: deletedOperation.clientName, - operationNames: deletedOperation.operationNames, - } - : undefined, - }; + await opts.blobStorage.deleteObject({ key: path }); } catch (e) { const error = e instanceof Error ? e : new Error('Unknown error'); logger.error(error, `Could not delete operation for ${operation.operationId} at ${path}`); @@ -112,5 +87,35 @@ export function deletePersistedOperation( }, }; } + + try { + await generateAndUploadManifest({ + db: opts.db, + federatedGraphId: federatedGraph.id, + organizationId: authContext.organizationId, + blobStorage: opts.blobStorage, + logger, + }); + } catch (e) { + const error = e instanceof Error ? e : new Error('Unknown error'); + logger.error(error, `Failed to regenerate PQL manifest after deleting operation ${operation.operationId}`, { + federatedGraphId: federatedGraph.id, + organizationId: authContext.organizationId, + }); + } + + return { + response: { + code: EnumStatusCode.OK, + }, + operation: deletedOperation + ? { + id: deletedOperation.id, + operationId: deletedOperation.operationId, + clientName: deletedOperation.clientName, + operationNames: deletedOperation.operationNames, + } + : undefined, + }; }); } diff --git a/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts b/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts index b737e3bc16..a9d831c92c 100644 --- a/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts +++ b/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts @@ -25,6 +25,10 @@ export async function generateAndUploadManifest(params: { const operationsRepo = new OperationsRepository(db, federatedGraphId); const allOperations = await operationsRepo.getAllPersistedOperationsForGraph(); + if (allOperations.length === 0) { + logger.warn({ federatedGraphId }, 'No persisted operations with content found for manifest generation'); + } + const operations: Record = {}; for (const op of allOperations) { operations[op.hash] = op.operationContent; diff --git a/controlplane/src/core/repositories/OperationsRepository.ts b/controlplane/src/core/repositories/OperationsRepository.ts index 14c48f8d28..fabe701d86 100644 --- a/controlplane/src/core/repositories/OperationsRepository.ts +++ b/controlplane/src/core/repositories/OperationsRepository.ts @@ -248,13 +248,15 @@ export class OperationsRepository { .innerJoin(federatedGraphClients, eq(federatedGraphClients.id, federatedGraphPersistedOperations.clientId)) .where(eq(federatedGraphPersistedOperations.federatedGraphId, this.federatedGraphId)); - return results.map((r) => ({ - hash: r.hash, - operationContent: r.operationContent ?? '', - operationId: r.operationId, - operationNames: r.operationNames ?? [], - clientName: r.clientName, - })); + return results + .filter((r) => r.operationContent != null) + .map((r) => ({ + hash: r.hash, + operationContent: r.operationContent!, + operationId: r.operationId, + operationNames: r.operationNames ?? [], + clientName: r.clientName, + })); } public async getRegisteredClients(): Promise { diff --git a/router/internal/persistedoperation/README.md b/router/internal/persistedoperation/README.md index dcab23a0d0..128136abcd 100644 --- a/router/internal/persistedoperation/README.md +++ b/router/internal/persistedoperation/README.md @@ -15,7 +15,7 @@ These methods can exist in concert — for example, users can enable the PQL man ## Lookup Order -When a persisted operation request arrives, the client resolves it in this order: +When a persisted operation request arrives, the router resolves it in this order: 1. **APQ cache** — if APQ is enabled and the hash is cached, use it 2. **In-memory normalization cache** — if the operation was previously resolved and cached locally @@ -24,11 +24,13 @@ When a persisted operation request arrives, the client resolves it in this order ## Flows +> **Hash validation prerequisite:** When a request includes both a query body and `extensions.persistedQuery.sha256Hash`, the router validates the body against the hash and rejects the request if they do not match — _before_ any APQ or persisted-operation lookup occurs. See `router/core/graphql_prehandler.go` (`handleOperation`). + 1. **Persisted Operations (CDN), no APQ** → The router fetches individual operations from CDN/S3 on demand. If a query is not found, the router returns an error. After the query is planned, the router caches the normalized query in the local persisted operation cache. 1. **PQL Manifest, no APQ** → The router downloads the manifest at startup and polls for updates. All lookups are in-memory. Unknown hashes are rejected immediately without any network call. -1. **APQ, No Persisted Operations** → If a `persisted_operation` request is sent, the router checks the APQ cache first. If not found, it checks if a query body was sent with the request. If so, it executes and caches it. Otherwise, the router returns an error. +1. **APQ, No Persisted Operations** → If a `persisted_operation` request is sent, the router checks the APQ cache first. If not found, it checks if a query body was sent with the request. If so, it validates the hash against the body, then executes and caches it. Otherwise, the router returns an error. 1. **No APQ, No Persisted Operations** → If a persisted operation is sent, the router returns an error, as there are no persisted operations stored. Even if a query is sent, the router will still error because APQ isn't enabled. -1. **APQ and Persisted Operations** → The router checks APQ first, then the PQL manifest or CDN (depending on config), then checks if a query body was attached. First match wins. +1. **APQ and Persisted Operations** → The router validates any included query body against the hash, then checks APQ first, then the PQL manifest or CDN (depending on config), then checks if a query body was attached. First match wins. ## Enforcement Modes diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index 098b9d16af..7b5fb3809a 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -82,7 +82,7 @@ func (c *Client) PersistedOperation(ctx context.Context, clientName string, sha2 return body, false, nil } // Manifest is authoritative — operation not found - if c.apqClient != nil { + if c.APQEnabled() { return nil, true, nil } return nil, false, &PersistentOperationNotFoundError{ diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher.go b/router/internal/persistedoperation/pqlmanifest/fetcher.go index 3d26cede48..2710f105e2 100644 --- a/router/internal/persistedoperation/pqlmanifest/fetcher.go +++ b/router/internal/persistedoperation/pqlmanifest/fetcher.go @@ -140,5 +140,15 @@ func (f *Fetcher) Fetch(ctx context.Context, currentRevision string) (*Manifest, return nil, false, fmt.Errorf("could not unmarshal PQL manifest: %w", err) } + if manifest.Version < 1 { + return nil, false, fmt.Errorf("unsupported PQL manifest version %d", manifest.Version) + } + if manifest.Revision == "" { + return nil, false, errors.New("PQL manifest has empty revision") + } + if manifest.Operations == nil { + return nil, false, errors.New("PQL manifest has no operations field") + } + return &manifest, true, nil } diff --git a/router/internal/persistedoperation/pqlmanifest/poller.go b/router/internal/persistedoperation/pqlmanifest/poller.go index 7055c5542a..2d7e88e725 100644 --- a/router/internal/persistedoperation/pqlmanifest/poller.go +++ b/router/internal/persistedoperation/pqlmanifest/poller.go @@ -17,6 +17,12 @@ type Poller struct { } func NewPoller(fetcher *Fetcher, store *Store, pollInterval, pollJitter time.Duration, logger *zap.Logger) *Poller { + if pollJitter < 0 { + pollJitter = 0 + } + if logger == nil { + logger = zap.NewNop() + } return &Poller{ fetcher: fetcher, store: store, From 5623d04a61410b1c406141058efc27facfe78680 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 21:43:00 +0100 Subject: [PATCH 06/31] feat: add error handling for PQL manifest regeneration in publishPersistedOperations --- .../publishPersistedOperations.ts | 22 +++++++++++++------ demo/pkg/subgraphs/subgraphs.go | 4 ++-- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts index b72aa6f029..da35a48f7b 100644 --- a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts +++ b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts @@ -267,13 +267,21 @@ export function publishPersistedOperations( await operationsRepo.updatePersistedOperations(clientId, userId, updatedOperations); - await generateAndUploadManifest({ - db: opts.db, - federatedGraphId: federatedGraph.id, - organizationId, - blobStorage: opts.blobStorage, - logger, - }); + try { + await generateAndUploadManifest({ + db: opts.db, + federatedGraphId: federatedGraph.id, + organizationId, + blobStorage: opts.blobStorage, + logger, + }); + } catch (e) { + const error = e instanceof Error ? e : new Error('Unknown error'); + logger.error(error, 'Failed to regenerate PQL manifest after publishing persisted operations', { + federatedGraphId: federatedGraph.id, + organizationId, + }); + } return { response: { diff --git a/demo/pkg/subgraphs/subgraphs.go b/demo/pkg/subgraphs/subgraphs.go index 3b5d294fca..44764f14bd 100644 --- a/demo/pkg/subgraphs/subgraphs.go +++ b/demo/pkg/subgraphs/subgraphs.go @@ -212,7 +212,7 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { natsPubSubByProviderID := map[string]natsPubsub.Adapter{} - defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ + defaultAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", false, datasource.ProviderOpts{ StreamMetricStore: rmetric.NewNoopStreamMetricStore(), }) if err != nil { @@ -223,7 +223,7 @@ func New(ctx context.Context, config *Config) (*Subgraphs, error) { } natsPubSubByProviderID["default"] = defaultAdapter - myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", datasource.ProviderOpts{ + myNatsAdapter, err := natsPubsub.NewAdapter(ctx, zap.NewNop(), url, []nats.Option{}, "hostname", "test", false, datasource.ProviderOpts{ StreamMetricStore: rmetric.NewNoopStreamMetricStore(), }) if err != nil { From b4e0c0e2d6ad76c308eb903e504d84ef8cada7e7 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 21:47:57 +0100 Subject: [PATCH 07/31] feat: update NewPoller to enforce minimum values for pollInterval and pollJitter --- router/internal/persistedoperation/pqlmanifest/poller.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/router/internal/persistedoperation/pqlmanifest/poller.go b/router/internal/persistedoperation/pqlmanifest/poller.go index 2d7e88e725..6d55333a08 100644 --- a/router/internal/persistedoperation/pqlmanifest/poller.go +++ b/router/internal/persistedoperation/pqlmanifest/poller.go @@ -17,8 +17,11 @@ type Poller struct { } func NewPoller(fetcher *Fetcher, store *Store, pollInterval, pollJitter time.Duration, logger *zap.Logger) *Poller { - if pollJitter < 0 { - pollJitter = 0 + if pollJitter <= 0 { + pollJitter = 5 + } + if pollInterval <= 0 { + pollInterval = 10 * time.Second } if logger == nil { logger = zap.NewNop() From 8bbfd3f6e2abe535b31fa9e2f3ad3671535cb2d8 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 22:09:04 +0100 Subject: [PATCH 08/31] fix: correct default value for pollJitter in NewPoller to use time.Duration --- router/internal/persistedoperation/pqlmanifest/poller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/internal/persistedoperation/pqlmanifest/poller.go b/router/internal/persistedoperation/pqlmanifest/poller.go index 6d55333a08..c84ca3e259 100644 --- a/router/internal/persistedoperation/pqlmanifest/poller.go +++ b/router/internal/persistedoperation/pqlmanifest/poller.go @@ -18,7 +18,7 @@ type Poller struct { func NewPoller(fetcher *Fetcher, store *Store, pollInterval, pollJitter time.Duration, logger *zap.Logger) *Poller { if pollJitter <= 0 { - pollJitter = 5 + pollJitter = 5 * time.Second } if pollInterval <= 0 { pollInterval = 10 * time.Second From b4c791bb7ac9e9da9ab13daee9c1a675b902d735 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Wed, 11 Mar 2026 22:42:36 +0100 Subject: [PATCH 09/31] feat: add truncation logic for manifest operations and corresponding test case --- .../persisted-operation/generateManifest.ts | 14 ++++- .../test/persisted-operations.test.ts | 58 +++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts b/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts index a9d831c92c..79d2526a2b 100644 --- a/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts +++ b/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts @@ -6,6 +6,8 @@ import type { BlobStorage } from '../../blobstorage/index.js'; import { OperationsRepository } from '../../repositories/OperationsRepository.js'; import { createManifestBlobStoragePath } from './utils.js'; +export const MAX_MANIFEST_OPERATIONS = 3000; + export interface PQLManifest { version: 1; revision: string; @@ -29,8 +31,18 @@ export async function generateAndUploadManifest(params: { logger.warn({ federatedGraphId }, 'No persisted operations with content found for manifest generation'); } + const truncated = allOperations.length > MAX_MANIFEST_OPERATIONS; + const includedOperations = truncated ? allOperations.slice(0, MAX_MANIFEST_OPERATIONS) : allOperations; + + if (truncated) { + logger.warn( + { federatedGraphId, organizationId, total: allOperations.length, included: MAX_MANIFEST_OPERATIONS }, + `Manifest truncated: found ${allOperations.length} operations, including only the first ${MAX_MANIFEST_OPERATIONS}`, + ); + } + const operations: Record = {}; - for (const op of allOperations) { + for (const op of includedOperations) { operations[op.hash] = op.operationContent; } diff --git a/controlplane/test/persisted-operations.test.ts b/controlplane/test/persisted-operations.test.ts index 3f042c76a9..891597eb80 100644 --- a/controlplane/test/persisted-operations.test.ts +++ b/controlplane/test/persisted-operations.test.ts @@ -1,7 +1,11 @@ +import crypto from 'node:crypto'; import { EnumStatusCode } from '@wundergraph/cosmo-connect/dist/common/common_pb'; import { joinLabel } from '@wundergraph/cosmo-shared'; import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi, type Mock } from 'vitest'; import { ClickHouseClient } from '../src/core/clickhouse/index.js'; +import { MAX_MANIFEST_OPERATIONS } from '../src/core/bufservices/persisted-operation/generateManifest.js'; +import { FederatedGraphRepository } from '../src/core/repositories/FederatedGraphRepository.js'; +import { OperationsRepository } from '../src/core/repositories/OperationsRepository.js'; import { afterAllSetup, beforeAllSetup, @@ -724,6 +728,60 @@ describe('Persisted operations', (ctx) => { // Same operations should produce the same revision expect(manifest2.revision).toBe(manifest1.revision); }); + + test('Should truncate the manifest to the max operation limit', async (testContext) => { + const { client, server, blobStorage, users } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const user = users.adminAliceCompanyA; + const db = server.db; + const logger = server.log; + + // Resolve the federated graph ID. + const fedGraphRepo = new FederatedGraphRepository(logger, db, user.organizationId); + const fedGraph = await fedGraphRepo.byName(fedGraphName, 'default'); + expect(fedGraph).toBeDefined(); + + // Seed operations directly in the DB to avoid hitting the per-request + // limit of 100 repeatedly. + const opsRepo = new OperationsRepository(db, fedGraph!.id); + const clientId = await opsRepo.registerClient('test-client', user.userId); + + const seedOps = Array.from({ length: MAX_MANIFEST_OPERATIONS + 1 }, (_, i) => ({ + operationId: `seed-op-${i}`, + hash: crypto.createHash('sha256').update(`seed-op-${i}`).digest('hex'), + filePath: `seed-op-${i}.graphql`, + contents: `query { hello }`, + operationNames: [`SeedOp${i}`], + })); + await opsRepo.updatePersistedOperations(clientId, user.userId, seedOps); + + // Publish one operation via the API to trigger manifest generation. + const resp = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [{ id: genID('trigger'), contents: `query { hello }` }], + }); + expect(resp.response?.code).toBe(EnumStatusCode.OK); + + // The manifest should be truncated to MAX_MANIFEST_OPERATIONS. + const storageKeys = blobStorage.keys(); + const manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + expect(manifestKey).toBeDefined(); + + const blobObject = await blobStorage.getObject({ key: manifestKey! }); + const text = await new Response(blobObject.stream).text(); + const manifest = JSON.parse(text); + + expect(Object.keys(manifest.operations).length).toBe(MAX_MANIFEST_OPERATIONS); + }); }); describe('check', () => { From f4f92218a8172516892e78ae550f48a18ddc83c9 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 12 Mar 2026 19:41:51 +0100 Subject: [PATCH 10/31] feat(persisted-query): implement persisted query list and cache invalidation logic --- router-tests/pql_manifest_test.go | 110 ++++++++++++++++++ router/core/operation_processor.go | 4 + router/internal/persistedoperation/client.go | 13 ++- .../pqlmanifest/store_test.go | 42 +++++++ 4 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 router/internal/persistedoperation/pqlmanifest/store_test.go diff --git a/router-tests/pql_manifest_test.go b/router-tests/pql_manifest_test.go index 2bb2b11512..cdc1da71c0 100644 --- a/router-tests/pql_manifest_test.go +++ b/router-tests/pql_manifest_test.go @@ -4,10 +4,13 @@ import ( "encoding/json" "io" "net/http" + "net/http/httptest" "strings" + "sync/atomic" "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/wundergraph/cosmo/router-tests/testenv" "github.com/wundergraph/cosmo/router/core" @@ -353,4 +356,111 @@ func TestPQLManifest(t *testing.T) { require.Equal(t, nonPersistedQuery, requestContext["query"]) }) }) + + t.Run("manifest update invalidates normalization cache", func(t *testing.T) { + t.Parallel() + + employeesHash := "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f" + employeesQuery := "query Employees {\n employees {\n id\n }\n}" + + // manifestV1 has the Employees operation + manifestV1, _ := json.Marshal(map[string]interface{}{ + "version": 1, + "revision": "rev-v1", + "generatedAt": "2024-01-01T00:00:00Z", + "operations": map[string]string{ + employeesHash: employeesQuery, + }, + }) + // manifestV2 removes the Employees operation + manifestV2, _ := json.Marshal(map[string]interface{}{ + "version": 1, + "revision": "rev-v2", + "generatedAt": "2024-01-02T00:00:00Z", + "operations": map[string]string{}, + }) + + var currentManifest atomic.Value + currentManifest.Store(manifestV1) + + cdnServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/operations/manifest.json") { + // Read the request body to check revision + body, _ := io.ReadAll(r.Body) + var reqBody struct { + Revision string `json:"revision"` + } + _ = json.Unmarshal(body, &reqBody) + + manifest := currentManifest.Load().([]byte) + + // Parse manifest to get its revision + var m struct { + Revision string `json:"revision"` + } + _ = json.Unmarshal(manifest, &m) + + if reqBody.Revision == m.Revision { + w.WriteHeader(http.StatusNotModified) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write(manifest) + return + } + + // For non-manifest requests, return 404 + w.WriteHeader(http.StatusNotFound) + })) + defer cdnServer.Close() + + testenv.Run(t, &testenv.Config{ + CdnSever: cdnServer, + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 100 * time.Millisecond, + PollJitter: 5 * time.Millisecond, + }, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // 1. Operation succeeds with manifest v1 + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "` + employeesHash + `"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // 2. Make the same request again to populate the normalization cache + res, err = xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "` + employeesHash + `"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // 3. Swap to manifest v2 (which removes the operation) + currentManifest.Store(manifestV2) + + // 4. Wait for poller to pick up the new manifest and cache to be invalidated + require.EventuallyWithT(t, func(ct *assert.CollectT) { + res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "` + employeesHash + `"}}`), + Header: header, + }) + assert.Equal(ct, persistedNotFoundResp, res.Body) + }, 5*time.Second, 100*time.Millisecond) + }) + }) } diff --git a/router/core/operation_processor.go b/router/core/operation_processor.go index 68d71af2ed..e306ff7bdd 100644 --- a/router/core/operation_processor.go +++ b/router/core/operation_processor.go @@ -1240,6 +1240,10 @@ func (o *OperationKit) generatePersistedOperationCacheKey(clientName string, ski _, _ = o.kit.keyGen.WriteString(o.parsedOperation.Request.OperationName) } _, _ = o.kit.keyGen.WriteString(clientName) + // Include manifest revision so cache entries naturally invalidate when the manifest changes + if o.operationProcessor.persistedOperationClient != nil { + _, _ = o.kit.keyGen.WriteString(o.operationProcessor.persistedOperationClient.ManifestRevision()) + } o.writeSkipIncludeCacheKeyToKeyGen(skipIncludeVariableNames) sum := o.kit.keyGen.Sum64() o.kit.keyGen.Reset() diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index 7b5fb3809a..d70d9ac42d 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -65,7 +65,7 @@ func NewClient(opts *Options) (*Client, error) { } func (c *Client) PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, bool, error) { - if c.apqClient != nil && c.apqClient.Enabled() { + if c.APQEnabled() { resp, apqErr := c.apqClient.PersistedOperation(ctx, clientName, sha256Hash) if len(resp) > 0 || apqErr != nil { return resp, true, apqErr @@ -92,7 +92,7 @@ func (c *Client) PersistedOperation(ctx context.Context, clientName string, sha2 if c.providerClient == nil { // This can happen if we are using APQ client, without any persisted operation client. Otherwise, we should have a provider client and shouldn't reach here. - return nil, c.apqClient != nil, nil + return nil, c.APQEnabled(), nil } var ( @@ -125,6 +125,15 @@ func (c *Client) APQEnabled() bool { return c.apqClient != nil && c.apqClient.Enabled() } +// ManifestRevision returns the current PQL manifest revision, or "" if no manifest is loaded. +// Used to include in cache keys so entries naturally invalidate when the manifest changes. +func (c *Client) ManifestRevision() string { + if c.pqlStore == nil { + return "" + } + return c.pqlStore.Revision() +} + func (c *Client) Close() { if c.providerClient != nil { c.providerClient.Close() diff --git a/router/internal/persistedoperation/pqlmanifest/store_test.go b/router/internal/persistedoperation/pqlmanifest/store_test.go new file mode 100644 index 0000000000..15002cc9d2 --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/store_test.go @@ -0,0 +1,42 @@ +package pqlmanifest + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func TestStore(t *testing.T) { + t.Run("Load and LookupByHash", func(t *testing.T) { + store := NewStore(zap.NewNop()) + + store.Load(&Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{"abc": "query { a }"}, + }) + + body, found := store.LookupByHash("abc") + require.True(t, found) + require.Equal(t, "query { a }", string(body)) + require.Equal(t, "rev-1", store.Revision()) + }) + + t.Run("Revision changes on Load", func(t *testing.T) { + store := NewStore(zap.NewNop()) + + store.Load(&Manifest{Version: 1, Revision: "rev-1", Operations: map[string]string{"abc": "query { a }"}}) + require.Equal(t, "rev-1", store.Revision()) + + store.Load(&Manifest{Version: 1, Revision: "rev-2", Operations: map[string]string{"def": "query { b }"}}) + require.Equal(t, "rev-2", store.Revision()) + + // Old operation gone, new one present + _, found := store.LookupByHash("abc") + require.False(t, found) + body, found := store.LookupByHash("def") + require.True(t, found) + require.Equal(t, "query { b }", string(body)) + }) +} From ebd9e29588a1192d3ad7c41ad194e49879b4bbee Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 12 Mar 2026 22:11:02 +0100 Subject: [PATCH 11/31] fix(telemetry): remove sample_rate comment for clarity --- router/pkg/config/fixtures/full.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/router/pkg/config/fixtures/full.yaml b/router/pkg/config/fixtures/full.yaml index 0187e4ff8d..c7fcb14e10 100644 --- a/router/pkg/config/fixtures/full.yaml +++ b/router/pkg/config/fixtures/full.yaml @@ -199,7 +199,6 @@ telemetry: schema_usage: enabled: true include_operation_sha: true - sample_rate: 1.0 # Supports any rate: 1.0, 0.8, 0.5, 0.1, 0.01, etc. cache_control_policy: enabled: true From 077d34fa6cedca4a6f786014c1af683421aacbcd Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Mon, 23 Mar 2026 19:50:39 +0530 Subject: [PATCH 12/31] refactor: move generateAndUploadManifest to OperationsRepository and remove unused file --- .../deletePersistedOperation.ts | 5 +- .../persisted-operation/generateManifest.ts | 77 ------------------- .../publishPersistedOperations.ts | 5 +- .../core/repositories/OperationsRepository.ts | 71 +++++++++++++++++ .../test/persisted-operations.test.ts | 2 +- 5 files changed, 74 insertions(+), 86 deletions(-) delete mode 100644 controlplane/src/core/bufservices/persisted-operation/generateManifest.ts diff --git a/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts b/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts index 27e18e87cc..59baf9e45b 100644 --- a/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts +++ b/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts @@ -10,7 +10,6 @@ import { UnauthorizedError } from '../../errors/errors.js'; import { OperationsRepository } from '../../repositories/OperationsRepository.js'; import type { RouterOptions } from '../../routes.js'; import { enrichLogger, getLogger, handleError } from '../../util.js'; -import { generateAndUploadManifest } from './generateManifest.js'; import { createBlobStoragePath } from './utils.js'; export function deletePersistedOperation( @@ -89,9 +88,7 @@ export function deletePersistedOperation( } try { - await generateAndUploadManifest({ - db: opts.db, - federatedGraphId: federatedGraph.id, + await operationsRepo.generateAndUploadManifest({ organizationId: authContext.organizationId, blobStorage: opts.blobStorage, logger, diff --git a/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts b/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts deleted file mode 100644 index 79d2526a2b..0000000000 --- a/controlplane/src/core/bufservices/persisted-operation/generateManifest.ts +++ /dev/null @@ -1,77 +0,0 @@ -import crypto from 'node:crypto'; -import { PostgresJsDatabase } from 'drizzle-orm/postgres-js'; -import { FastifyBaseLogger } from 'fastify'; -import * as schema from '../../../db/schema.js'; -import type { BlobStorage } from '../../blobstorage/index.js'; -import { OperationsRepository } from '../../repositories/OperationsRepository.js'; -import { createManifestBlobStoragePath } from './utils.js'; - -export const MAX_MANIFEST_OPERATIONS = 3000; - -export interface PQLManifest { - version: 1; - revision: string; - generatedAt: string; - operations: Record; // sha256 hash -> operation body -} - -export async function generateAndUploadManifest(params: { - db: PostgresJsDatabase; - federatedGraphId: string; - organizationId: string; - blobStorage: BlobStorage; - logger: FastifyBaseLogger; -}): Promise<{ revision: string; operationCount: number }> { - const { db, federatedGraphId, organizationId, blobStorage, logger } = params; - - const operationsRepo = new OperationsRepository(db, federatedGraphId); - const allOperations = await operationsRepo.getAllPersistedOperationsForGraph(); - - if (allOperations.length === 0) { - logger.warn({ federatedGraphId }, 'No persisted operations with content found for manifest generation'); - } - - const truncated = allOperations.length > MAX_MANIFEST_OPERATIONS; - const includedOperations = truncated ? allOperations.slice(0, MAX_MANIFEST_OPERATIONS) : allOperations; - - if (truncated) { - logger.warn( - { federatedGraphId, organizationId, total: allOperations.length, included: MAX_MANIFEST_OPERATIONS }, - `Manifest truncated: found ${allOperations.length} operations, including only the first ${MAX_MANIFEST_OPERATIONS}`, - ); - } - - const operations: Record = {}; - for (const op of includedOperations) { - operations[op.hash] = op.operationContent; - } - - // Compute revision as SHA256 of the deterministic JSON serialization (sorted keys) - const sortedKeys = Object.keys(operations).sort(); - const sortedOperations: Record = {}; - for (const key of sortedKeys) { - sortedOperations[key] = operations[key]; - } - const serialized = JSON.stringify(sortedOperations); - const revision = crypto.createHash('sha256').update(serialized).digest('hex'); - - const manifest: PQLManifest = { - version: 1, - revision, - generatedAt: new Date().toISOString(), - operations: sortedOperations, - }; - - const path = createManifestBlobStoragePath({ organizationId, fedGraphId: federatedGraphId }); - - await blobStorage.putObject({ - key: path, - body: Buffer.from(JSON.stringify(manifest), 'utf8'), - contentType: 'application/json; charset=utf-8', - metadata: { version: revision }, - }); - - logger.debug({ revision, operationCount: allOperations.length, path }, 'PQL manifest generated and uploaded'); - - return { revision, operationCount: allOperations.length }; -} diff --git a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts index da35a48f7b..a5794160cd 100644 --- a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts +++ b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts @@ -18,7 +18,6 @@ import { OperationsRepository } from '../../repositories/OperationsRepository.js import type { RouterOptions } from '../../routes.js'; import { enrichLogger, extractOperationNames, getLogger, handleError } from '../../util.js'; import { UnauthorizedError } from '../../errors/errors.js'; -import { generateAndUploadManifest } from './generateManifest.js'; import { createBlobStoragePath } from './utils.js'; const MAX_PERSISTED_OPERATIONS = 100; @@ -268,9 +267,7 @@ export function publishPersistedOperations( await operationsRepo.updatePersistedOperations(clientId, userId, updatedOperations); try { - await generateAndUploadManifest({ - db: opts.db, - federatedGraphId: federatedGraph.id, + await operationsRepo.generateAndUploadManifest({ organizationId, blobStorage: opts.blobStorage, logger, diff --git a/controlplane/src/core/repositories/OperationsRepository.ts b/controlplane/src/core/repositories/OperationsRepository.ts index fabe701d86..6c13a2ff9f 100644 --- a/controlplane/src/core/repositories/OperationsRepository.ts +++ b/controlplane/src/core/repositories/OperationsRepository.ts @@ -1,10 +1,14 @@ +import crypto from 'node:crypto'; import { OverrideChange } from '@wundergraph/cosmo-connect/dist/platform/v1/platform_pb'; import { aliasedTable, and, asc, desc, eq, isNull, sql } from 'drizzle-orm'; import { PostgresJsDatabase } from 'drizzle-orm/postgres-js'; import { PlainMessage } from '@bufbuild/protobuf'; +import { FastifyBaseLogger } from 'fastify'; import { DBSchemaChangeType } from '../../db/models.js'; import * as schema from '../../db/schema.js'; import { federatedGraphClients, federatedGraphPersistedOperations, users } from '../../db/schema.js'; +import type { BlobStorage } from '../blobstorage/index.js'; +import { createManifestBlobStoragePath } from '../bufservices/persisted-operation/utils.js'; import { ClientDTO, PersistedOperationDTO, @@ -15,6 +19,15 @@ import { } from '../../types/index.js'; import { SchemaCheckRepository } from './SchemaCheckRepository.js'; +export const MAX_MANIFEST_OPERATIONS = 3000; + +export interface PQLManifest { + version: 1; + revision: string; + generatedAt: string; + operations: Record; // sha256 hash -> operation body +} + type ChangeOverride = IgnoreAllOverride & { changeType: DBSchemaChangeType; path: string | null; @@ -532,6 +545,64 @@ export class OperationsRepository { .orderBy(({ name, hash }) => [asc(name), asc(hash)]); } + public async generateAndUploadManifest(params: { + organizationId: string; + blobStorage: BlobStorage; + logger: FastifyBaseLogger; + }): Promise<{ revision: string; operationCount: number }> { + const { organizationId, blobStorage, logger } = params; + + const allOperations = await this.getAllPersistedOperationsForGraph(); + + if (allOperations.length === 0) { + logger.warn({ federatedGraphId: this.federatedGraphId }, 'No persisted operations with content found for manifest generation'); + } + + const truncated = allOperations.length > MAX_MANIFEST_OPERATIONS; + const includedOperations = truncated ? allOperations.slice(0, MAX_MANIFEST_OPERATIONS) : allOperations; + + if (truncated) { + logger.warn( + { federatedGraphId: this.federatedGraphId, organizationId, total: allOperations.length, included: MAX_MANIFEST_OPERATIONS }, + `Manifest truncated: found ${allOperations.length} operations, including only the first ${MAX_MANIFEST_OPERATIONS}`, + ); + } + + const operations: Record = {}; + for (const op of includedOperations) { + operations[op.hash] = op.operationContent; + } + + // Compute revision as SHA256 of the deterministic JSON serialization (sorted keys) + const sortedKeys = Object.keys(operations).sort(); + const sortedOperations: Record = {}; + for (const key of sortedKeys) { + sortedOperations[key] = operations[key]; + } + const serialized = JSON.stringify(sortedOperations); + const revision = crypto.createHash('sha256').update(serialized).digest('hex'); + + const manifest: PQLManifest = { + version: 1, + revision, + generatedAt: new Date().toISOString(), + operations: sortedOperations, + }; + + const path = createManifestBlobStoragePath({ organizationId, fedGraphId: this.federatedGraphId }); + + await blobStorage.putObject({ + key: path, + body: Buffer.from(JSON.stringify(manifest), 'utf8'), + contentType: 'application/json; charset=utf-8', + metadata: { version: revision }, + }); + + logger.debug({ revision, operationCount: allOperations.length, path }, 'PQL manifest generated and uploaded'); + + return { revision, operationCount: allOperations.length }; + } + private static createPersistedOperationDTO({ id, operationId, diff --git a/controlplane/test/persisted-operations.test.ts b/controlplane/test/persisted-operations.test.ts index 891597eb80..3aa8614392 100644 --- a/controlplane/test/persisted-operations.test.ts +++ b/controlplane/test/persisted-operations.test.ts @@ -3,7 +3,7 @@ import { EnumStatusCode } from '@wundergraph/cosmo-connect/dist/common/common_pb import { joinLabel } from '@wundergraph/cosmo-shared'; import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi, type Mock } from 'vitest'; import { ClickHouseClient } from '../src/core/clickhouse/index.js'; -import { MAX_MANIFEST_OPERATIONS } from '../src/core/bufservices/persisted-operation/generateManifest.js'; +import { MAX_MANIFEST_OPERATIONS } from '../src/core/repositories/OperationsRepository.js'; import { FederatedGraphRepository } from '../src/core/repositories/FederatedGraphRepository.js'; import { OperationsRepository } from '../src/core/repositories/OperationsRepository.js'; import { From b4bc77214d27bdc9d7090ffecd617e71c0b9b9e8 Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Tue, 24 Mar 2026 00:06:48 +0530 Subject: [PATCH 13/31] refactor: remove Cache-Control header from response in persisted operations --- cdn-server/cdn/src/index.ts | 1 - cdn-server/cdn/test/cdn.test.ts | 1 - controlplane/test/persisted-operations.test.ts | 3 +-- router/internal/persistedoperation/client.go | 3 ++- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cdn-server/cdn/src/index.ts b/cdn-server/cdn/src/index.ts index 3dc2954843..8f9c59e81d 100644 --- a/cdn-server/cdn/src/index.ts +++ b/cdn-server/cdn/src/index.ts @@ -309,7 +309,6 @@ const persistedOperationsManifest = (storage: BlobStorage) => { } c.header('Content-Type', 'application/json; charset=UTF-8'); - c.header('Cache-Control', 'no-cache, no-store, must-revalidate'); return stream(c, async (stream) => { await stream.pipe(blobObject.stream); diff --git a/cdn-server/cdn/test/cdn.test.ts b/cdn-server/cdn/test/cdn.test.ts index 2e804061b8..ea8bd53686 100644 --- a/cdn-server/cdn/test/cdn.test.ts +++ b/cdn-server/cdn/test/cdn.test.ts @@ -644,7 +644,6 @@ describe('CDN handlers', () => { }); expect(res.status).toBe(200); expect(res.headers.get('Content-Type')).toBe('application/json; charset=UTF-8'); - expect(res.headers.get('Cache-Control')).toBe('no-cache, no-store, must-revalidate'); expect(await res.text()).toBe(manifestContents); }); diff --git a/controlplane/test/persisted-operations.test.ts b/controlplane/test/persisted-operations.test.ts index 3aa8614392..7a4ca3ca49 100644 --- a/controlplane/test/persisted-operations.test.ts +++ b/controlplane/test/persisted-operations.test.ts @@ -3,9 +3,8 @@ import { EnumStatusCode } from '@wundergraph/cosmo-connect/dist/common/common_pb import { joinLabel } from '@wundergraph/cosmo-shared'; import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi, type Mock } from 'vitest'; import { ClickHouseClient } from '../src/core/clickhouse/index.js'; -import { MAX_MANIFEST_OPERATIONS } from '../src/core/repositories/OperationsRepository.js'; import { FederatedGraphRepository } from '../src/core/repositories/FederatedGraphRepository.js'; -import { OperationsRepository } from '../src/core/repositories/OperationsRepository.js'; +import { MAX_MANIFEST_OPERATIONS, OperationsRepository } from '../src/core/repositories/OperationsRepository.js'; import { afterAllSetup, beforeAllSetup, diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index d70d9ac42d..9b7dd82345 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -91,7 +91,8 @@ func (c *Client) PersistedOperation(ctx context.Context, clientName string, sha2 } if c.providerClient == nil { - // This can happen if we are using APQ client, without any persisted operation client. Otherwise, we should have a provider client and shouldn't reach here. + // This can happen if we are using APQ client without any persisted operation client, + // or if the PQL manifest is enabled but hasn't loaded yet (e.g. initial fetch failed). return nil, c.APQEnabled(), nil } From 5cb5ec970206391075072d595fd73d1ed61384e3 Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Tue, 24 Mar 2026 13:07:28 +0530 Subject: [PATCH 14/31] feat: implement operation limit check in publishPersistedOperations and update related tests --- .../publishPersistedOperations.ts | 20 +++++++++++++- .../core/repositories/OperationsRepository.ts | 12 +-------- .../test/persisted-operations.test.ts | 27 ++++++------------- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts index a5794160cd..f4e24ceebd 100644 --- a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts +++ b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts @@ -14,7 +14,7 @@ import { buildASTSchema as graphQLBuildASTSchema, DocumentNode, parse, validate import { PublishedOperationData, UpdatedPersistedOperation } from '../../../types/index.js'; import { FederatedGraphRepository } from '../../repositories/FederatedGraphRepository.js'; import { DefaultNamespace } from '../../repositories/NamespaceRepository.js'; -import { OperationsRepository } from '../../repositories/OperationsRepository.js'; +import { MAX_MANIFEST_OPERATIONS, OperationsRepository } from '../../repositories/OperationsRepository.js'; import type { RouterOptions } from '../../routes.js'; import { enrichLogger, extractOperationNames, getLogger, handleError } from '../../util.js'; import { UnauthorizedError } from '../../errors/errors.js'; @@ -158,6 +158,24 @@ export function publishPersistedOperations( operationsResult.map((op) => [op.operationId, { hash: op.hash, operationNames: op.operationNames }]), ); + // Check if adding new operations would exceed the manifest limit + const allExistingOperations = await operationsRepo.getAllPersistedOperationsForGraph(); + const existingHashes = new Set(allExistingOperations.map((op) => op.hash)); + const newOperationCount = req.operations.filter((op) => { + const hash = crypto.createHash('sha256').update(op.contents).digest('hex'); + return !existingHashes.has(hash); + }).length; + + if (allExistingOperations.length + newOperationCount > MAX_MANIFEST_OPERATIONS) { + return { + response: { + code: EnumStatusCode.ERR, + details: `Operation limit exceeded: adding ${newOperationCount} new operations would bring the total to ${allExistingOperations.length + newOperationCount}, which exceeds the maximum of ${MAX_MANIFEST_OPERATIONS} operations per graph`, + }, + operations: [], + }; + } + const processOperation = async ( operation: PersistedOperation, ): Promise<{ diff --git a/controlplane/src/core/repositories/OperationsRepository.ts b/controlplane/src/core/repositories/OperationsRepository.ts index 6c13a2ff9f..06a6675cbf 100644 --- a/controlplane/src/core/repositories/OperationsRepository.ts +++ b/controlplane/src/core/repositories/OperationsRepository.ts @@ -558,18 +558,8 @@ export class OperationsRepository { logger.warn({ federatedGraphId: this.federatedGraphId }, 'No persisted operations with content found for manifest generation'); } - const truncated = allOperations.length > MAX_MANIFEST_OPERATIONS; - const includedOperations = truncated ? allOperations.slice(0, MAX_MANIFEST_OPERATIONS) : allOperations; - - if (truncated) { - logger.warn( - { federatedGraphId: this.federatedGraphId, organizationId, total: allOperations.length, included: MAX_MANIFEST_OPERATIONS }, - `Manifest truncated: found ${allOperations.length} operations, including only the first ${MAX_MANIFEST_OPERATIONS}`, - ); - } - const operations: Record = {}; - for (const op of includedOperations) { + for (const op of allOperations) { operations[op.hash] = op.operationContent; } diff --git a/controlplane/test/persisted-operations.test.ts b/controlplane/test/persisted-operations.test.ts index 7a4ca3ca49..44be27de31 100644 --- a/controlplane/test/persisted-operations.test.ts +++ b/controlplane/test/persisted-operations.test.ts @@ -728,7 +728,7 @@ describe('Persisted operations', (ctx) => { expect(manifest2.revision).toBe(manifest1.revision); }); - test('Should truncate the manifest to the max operation limit', async (testContext) => { + test('Should reject publish when operation limit would be exceeded', async (testContext) => { const { client, server, blobStorage, users } = await SetupTest({ dbname, chClient, @@ -747,39 +747,28 @@ describe('Persisted operations', (ctx) => { const fedGraph = await fedGraphRepo.byName(fedGraphName, 'default'); expect(fedGraph).toBeDefined(); - // Seed operations directly in the DB to avoid hitting the per-request - // limit of 100 repeatedly. + // Seed operations directly in the DB to fill up to the limit. const opsRepo = new OperationsRepository(db, fedGraph!.id); const clientId = await opsRepo.registerClient('test-client', user.userId); - const seedOps = Array.from({ length: MAX_MANIFEST_OPERATIONS + 1 }, (_, i) => ({ + const seedOps = Array.from({ length: MAX_MANIFEST_OPERATIONS }, (_, i) => ({ operationId: `seed-op-${i}`, hash: crypto.createHash('sha256').update(`seed-op-${i}`).digest('hex'), filePath: `seed-op-${i}.graphql`, - contents: `query { hello }`, + contents: `query SeedOp${i} { hello }`, operationNames: [`SeedOp${i}`], })); await opsRepo.updatePersistedOperations(clientId, user.userId, seedOps); - // Publish one operation via the API to trigger manifest generation. + // Publishing a new operation should be rejected because the limit is already reached. const resp = await client.publishPersistedOperations({ fedGraphName, namespace: 'default', clientName: 'test-client', - operations: [{ id: genID('trigger'), contents: `query { hello }` }], + operations: [{ id: genID('trigger'), contents: `query ExceedsLimit { goodbye }` }], }); - expect(resp.response?.code).toBe(EnumStatusCode.OK); - - // The manifest should be truncated to MAX_MANIFEST_OPERATIONS. - const storageKeys = blobStorage.keys(); - const manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); - expect(manifestKey).toBeDefined(); - - const blobObject = await blobStorage.getObject({ key: manifestKey! }); - const text = await new Response(blobObject.stream).text(); - const manifest = JSON.parse(text); - - expect(Object.keys(manifest.operations).length).toBe(MAX_MANIFEST_OPERATIONS); + expect(resp.response?.code).toBe(EnumStatusCode.ERR); + expect(resp.response?.details).toContain('Operation limit exceeded'); }); }); From 716a67078a6b11cf67e63298b989c93f16ddb23e Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Tue, 24 Mar 2026 18:49:06 +0530 Subject: [PATCH 15/31] feat: implement persisted query list (PQL) loading from various storage providers --- router/core/router.go | 102 +++++++++++++--- .../persistedoperation/pqlmanifest/fetcher.go | 10 +- .../persistedoperation/pqlmanifest/store.go | 115 ++++++++++++++++++ router/pkg/config/config.go | 1 + 4 files changed, 202 insertions(+), 26 deletions(-) diff --git a/router/core/router.go b/router/core/router.go index 77c158c31c..d30f13450d 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -1253,29 +1253,37 @@ func (r *Router) buildClients(ctx context.Context) error { var pqlStore *pqlmanifest.Store if r.persistedOperationsConfig.Manifest.Enabled { - if r.graphApiToken == "" { - return errors.New("graph token is required for PQL manifest") - } + pqlStore = pqlmanifest.NewStore(r.logger) - fetcher, err := pqlmanifest.NewFetcher(r.cdnConfig.URL, r.graphApiToken, r.logger) - if err != nil { - return fmt.Errorf("failed to create PQL manifest fetcher: %w", err) - } + manifestPath := r.persistedOperationsConfig.Manifest.Path + if manifestPath != "" { + if err := r.loadPQLManifestFromStorage(ctx, pqlStore, manifestPath, fileSystemProviders, s3Providers, cdnProviders); err != nil { + return err + } + } else { + // No path set — fetch manifest from CDN and poll for updates + if r.graphApiToken == "" { + return errors.New("graph token is required for PQL manifest") + } - pqlStore = pqlmanifest.NewStore(r.logger) + fetcher, err := pqlmanifest.NewFetcher(r.cdnConfig.URL, r.graphApiToken, r.logger) + if err != nil { + return fmt.Errorf("failed to create PQL manifest fetcher: %w", err) + } - poller := pqlmanifest.NewPoller( - fetcher, pqlStore, - r.persistedOperationsConfig.Manifest.PollInterval, - r.persistedOperationsConfig.Manifest.PollJitter, - r.logger, - ) + poller := pqlmanifest.NewPoller( + fetcher, pqlStore, + r.persistedOperationsConfig.Manifest.PollInterval, + r.persistedOperationsConfig.Manifest.PollJitter, + r.logger, + ) - if err := poller.FetchInitial(ctx); err != nil { - r.logger.Warn("Failed to fetch initial PQL manifest, will retry on next poll", zap.Error(err)) - } + if err := poller.FetchInitial(ctx); err != nil { + r.logger.Warn("Failed to fetch initial PQL manifest, will retry on next poll", zap.Error(err)) + } - go poller.Poll(ctx) + go poller.Poll(ctx) + } // When manifest is enabled, do not use CDN fetches for individual operations pClient = nil @@ -1313,6 +1321,64 @@ func (r *Router) buildClients(ctx context.Context) error { return nil } +// loadPQLManifestFromStorage loads a PQL manifest from the configured storage provider. +func (r *Router) loadPQLManifestFromStorage( + ctx context.Context, + pqlStore *pqlmanifest.Store, + manifestPath string, + fileSystemProviders map[string]config.FileSystemStorageProvider, + s3Providers map[string]config.S3StorageProvider, + cdnProviders map[string]config.CDNStorageProvider, +) error { + storageProviderID := r.persistedOperationsConfig.Storage.ProviderID + objectPrefix := r.persistedOperationsConfig.Storage.ObjectPrefix + + resolveObjectPath := func(path string) string { + if objectPrefix != "" { + return objectPrefix + "/" + path + } + return path + } + + var providerType string + + if provider, ok := fileSystemProviders[storageProviderID]; ok { + providerType = "filesystem" + fullPath := provider.Path + "/" + resolveObjectPath(manifestPath) + if err := pqlStore.LoadFromFile(fullPath); err != nil { + return fmt.Errorf("failed to load PQL manifest from filesystem provider %q at %q: %w", storageProviderID, fullPath, err) + } + } else if provider, ok := s3Providers[storageProviderID]; ok { + providerType = "s3" + if err := pqlStore.LoadFromS3(ctx, provider, resolveObjectPath(manifestPath)); err != nil { + return fmt.Errorf("failed to load PQL manifest from S3 provider %q: %w", storageProviderID, err) + } + } else if provider, ok := cdnProviders[storageProviderID]; ok { + providerType = "cdn" + if r.graphApiToken == "" { + return errors.New("graph token is required to fetch PQL manifest from CDN") + } + if err := pqlStore.LoadFromCDN(ctx, provider.URL, r.graphApiToken, manifestPath); err != nil { + return fmt.Errorf("failed to load PQL manifest from CDN provider %q: %w", storageProviderID, err) + } + } else if storageProviderID == "" { + providerType = "file" + if err := pqlStore.LoadFromFile(manifestPath); err != nil { + return fmt.Errorf("failed to load PQL manifest from file %q: %w", manifestPath, err) + } + } else { + return fmt.Errorf("unknown storage provider id %q for PQL manifest", storageProviderID) + } + + r.logger.Info("Loaded PQL manifest", + zap.String("source", providerType), + zap.String("provider_id", storageProviderID), + zap.Int("operations", pqlStore.OperationCount()), + ) + + return nil +} + // Start starts the router. It does block until the router has been initialized. After that the server is listening // on a separate goroutine. The server can be shutdown with Router.Shutdown(). Not safe for concurrent use. // During initialization, the router will register itself with the control plane and poll the config from the CDN diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher.go b/router/internal/persistedoperation/pqlmanifest/fetcher.go index 2710f105e2..af745665cc 100644 --- a/router/internal/persistedoperation/pqlmanifest/fetcher.go +++ b/router/internal/persistedoperation/pqlmanifest/fetcher.go @@ -140,14 +140,8 @@ func (f *Fetcher) Fetch(ctx context.Context, currentRevision string) (*Manifest, return nil, false, fmt.Errorf("could not unmarshal PQL manifest: %w", err) } - if manifest.Version < 1 { - return nil, false, fmt.Errorf("unsupported PQL manifest version %d", manifest.Version) - } - if manifest.Revision == "" { - return nil, false, errors.New("PQL manifest has empty revision") - } - if manifest.Operations == nil { - return nil, false, errors.New("PQL manifest has no operations field") + if err := validateManifest(&manifest); err != nil { + return nil, false, fmt.Errorf("invalid PQL manifest: %w", err) } return &manifest, true, nil diff --git a/router/internal/persistedoperation/pqlmanifest/store.go b/router/internal/persistedoperation/pqlmanifest/store.go index b04985a8a5..2c8cfb1d08 100644 --- a/router/internal/persistedoperation/pqlmanifest/store.go +++ b/router/internal/persistedoperation/pqlmanifest/store.go @@ -1,8 +1,17 @@ package pqlmanifest import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" "sync/atomic" + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/wundergraph/cosmo/router/pkg/config" "go.uber.org/zap" ) @@ -44,6 +53,112 @@ func (s *Store) LookupByHash(sha256Hash string) (body []byte, found bool) { return []byte(op), true } +// LoadFromFile reads a manifest JSON file from disk and loads it into the store. +func (s *Store) LoadFromFile(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("failed to read manifest file: %w", err) + } + + return s.loadFromData(data) +} + +// LoadFromS3 fetches a manifest from an S3 bucket and loads it into the store. +func (s *Store) LoadFromS3(ctx context.Context, provider config.S3StorageProvider, objectPath string) error { + providers := []credentials.Provider{ + &credentials.Static{ + Value: credentials.Value{ + AccessKeyID: provider.AccessKey, + SecretAccessKey: provider.SecretKey, + SignerType: credentials.SignatureV4, + }, + }, + &credentials.IAM{ + Client: &http.Client{ + Transport: http.DefaultTransport, + }, + }, + } + + minioClient, err := minio.New(provider.Endpoint, &minio.Options{ + Creds: credentials.NewChainCredentials(providers), + Region: provider.Region, + Secure: provider.Secure, + }) + if err != nil { + return fmt.Errorf("failed to create S3 client: %w", err) + } + + reader, err := minioClient.GetObject(ctx, provider.Bucket, objectPath, minio.GetObjectOptions{}) + if err != nil { + return fmt.Errorf("failed to get object from S3: %w", err) + } + defer reader.Close() + + data, err := io.ReadAll(reader) + if err != nil { + return fmt.Errorf("failed to read S3 object: %w", err) + } + + return s.loadFromData(data) +} + +// LoadFromCDN fetches a manifest from a CDN endpoint and loads it into the store. +func (s *Store) LoadFromCDN(ctx context.Context, cdnURL, token, manifestPath string) error { + reqURL := cdnURL + "/" + manifestPath + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) + if err != nil { + return fmt.Errorf("failed to create CDN request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+token) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return fmt.Errorf("failed to fetch manifest from CDN: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("CDN returned status %d", resp.StatusCode) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read CDN response: %w", err) + } + + return s.loadFromData(data) +} + +// loadFromData parses and validates manifest JSON data and loads it into the store. +func (s *Store) loadFromData(data []byte) error { + var manifest Manifest + if err := json.Unmarshal(data, &manifest); err != nil { + return fmt.Errorf("failed to parse manifest: %w", err) + } + + if err := validateManifest(&manifest); err != nil { + return fmt.Errorf("invalid manifest: %w", err) + } + + s.Load(&manifest) + return nil +} + +func validateManifest(m *Manifest) error { + if m.Version != 1 { + return fmt.Errorf("unsupported manifest version %d, expected 1", m.Version) + } + if m.Revision == "" { + return fmt.Errorf("manifest revision is required") + } + if m.Operations == nil { + return fmt.Errorf("manifest operations field is required") + } + return nil +} + // IsLoaded returns whether a manifest has been loaded. func (s *Store) IsLoaded() bool { return s.manifest.Load() != nil diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index cda12ce30b..130d4afe66 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -929,6 +929,7 @@ type AutomaticPersistedQueriesCacheConfig struct { type PQLManifestConfig struct { Enabled bool `yaml:"enabled" envDefault:"false" env:"ENABLED"` + Path string `yaml:"path" env:"PATH"` PollInterval time.Duration `yaml:"poll_interval" envDefault:"10s" env:"POLL_INTERVAL"` PollJitter time.Duration `yaml:"poll_jitter" envDefault:"5s" env:"POLL_JITTER"` } From 8c6200032b8e330f59d35b51b4b294f500a7ed5b Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Wed, 25 Mar 2026 15:10:40 +0530 Subject: [PATCH 16/31] fix: correct operation ID usage in operations mapping and improve CDN URL construction --- .../src/core/repositories/OperationsRepository.ts | 2 +- router/core/router.go | 12 +++++++----- .../internal/persistedoperation/pqlmanifest/store.go | 6 +++++- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/controlplane/src/core/repositories/OperationsRepository.ts b/controlplane/src/core/repositories/OperationsRepository.ts index 06a6675cbf..42d43865a8 100644 --- a/controlplane/src/core/repositories/OperationsRepository.ts +++ b/controlplane/src/core/repositories/OperationsRepository.ts @@ -560,7 +560,7 @@ export class OperationsRepository { const operations: Record = {}; for (const op of allOperations) { - operations[op.hash] = op.operationContent; + operations[op.operationId] = op.operationContent; } // Compute revision as SHA256 of the deterministic JSON serialization (sorted keys) diff --git a/router/core/router.go b/router/core/router.go index d30f13450d..4c03a8fbf6 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -10,6 +10,8 @@ import ( "net/http" "net/url" "os" + "path" + "path/filepath" "sync" "time" @@ -1333,18 +1335,18 @@ func (r *Router) loadPQLManifestFromStorage( storageProviderID := r.persistedOperationsConfig.Storage.ProviderID objectPrefix := r.persistedOperationsConfig.Storage.ObjectPrefix - resolveObjectPath := func(path string) string { + resolveObjectPath := func(p string) string { if objectPrefix != "" { - return objectPrefix + "/" + path + return path.Join(objectPrefix, p) } - return path + return p } var providerType string if provider, ok := fileSystemProviders[storageProviderID]; ok { providerType = "filesystem" - fullPath := provider.Path + "/" + resolveObjectPath(manifestPath) + fullPath := filepath.Join(provider.Path, resolveObjectPath(manifestPath)) if err := pqlStore.LoadFromFile(fullPath); err != nil { return fmt.Errorf("failed to load PQL manifest from filesystem provider %q at %q: %w", storageProviderID, fullPath, err) } @@ -1358,7 +1360,7 @@ func (r *Router) loadPQLManifestFromStorage( if r.graphApiToken == "" { return errors.New("graph token is required to fetch PQL manifest from CDN") } - if err := pqlStore.LoadFromCDN(ctx, provider.URL, r.graphApiToken, manifestPath); err != nil { + if err := pqlStore.LoadFromCDN(ctx, provider.URL, r.graphApiToken, resolveObjectPath(manifestPath)); err != nil { return fmt.Errorf("failed to load PQL manifest from CDN provider %q: %w", storageProviderID, err) } } else if storageProviderID == "" { diff --git a/router/internal/persistedoperation/pqlmanifest/store.go b/router/internal/persistedoperation/pqlmanifest/store.go index 2c8cfb1d08..998c314b5f 100644 --- a/router/internal/persistedoperation/pqlmanifest/store.go +++ b/router/internal/persistedoperation/pqlmanifest/store.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "net/http" + "net/url" "os" "sync/atomic" @@ -105,7 +106,10 @@ func (s *Store) LoadFromS3(ctx context.Context, provider config.S3StorageProvide // LoadFromCDN fetches a manifest from a CDN endpoint and loads it into the store. func (s *Store) LoadFromCDN(ctx context.Context, cdnURL, token, manifestPath string) error { - reqURL := cdnURL + "/" + manifestPath + reqURL, err := url.JoinPath(cdnURL, manifestPath) + if err != nil { + return fmt.Errorf("failed to construct CDN URL: %w", err) + } req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) if err != nil { From 9d8e98ef9dc73752b109896a57483c9ac9ed26b0 Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Wed, 25 Mar 2026 15:41:00 +0530 Subject: [PATCH 17/31] feat: implement FetchManifest methods for CDN, S3, and filesystem providers; refactor manifest loading logic --- router/core/router.go | 46 ++++++++-- .../operationstorage/cdn/client.go | 41 +++++++++ .../operationstorage/fs/client.go | 6 ++ .../operationstorage/s3/client.go | 13 +++ .../persistedoperation/pqlmanifest/store.go | 84 +------------------ 5 files changed, 103 insertions(+), 87 deletions(-) diff --git a/router/core/router.go b/router/core/router.go index 4c03a8fbf6..5cad90a1cc 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -11,7 +11,6 @@ import ( "net/url" "os" "path" - "path/filepath" "sync" "time" @@ -1346,23 +1345,58 @@ func (r *Router) loadPQLManifestFromStorage( if provider, ok := fileSystemProviders[storageProviderID]; ok { providerType = "filesystem" - fullPath := filepath.Join(provider.Path, resolveObjectPath(manifestPath)) - if err := pqlStore.LoadFromFile(fullPath); err != nil { - return fmt.Errorf("failed to load PQL manifest from filesystem provider %q at %q: %w", storageProviderID, fullPath, err) + fsClient, err := fs.NewClient(provider.Path, &fs.Options{ + ObjectPathPrefix: r.persistedOperationsConfig.Storage.ObjectPrefix, + }) + if err != nil { + return fmt.Errorf("failed to create filesystem client for PQL manifest: %w", err) + } + data, err := fsClient.FetchManifest(manifestPath) + if err != nil { + return fmt.Errorf("failed to load PQL manifest from filesystem provider %q: %w", storageProviderID, err) + } + if err := pqlStore.LoadFromData(data); err != nil { + return fmt.Errorf("failed to parse PQL manifest from filesystem provider %q: %w", storageProviderID, err) } } else if provider, ok := s3Providers[storageProviderID]; ok { providerType = "s3" - if err := pqlStore.LoadFromS3(ctx, provider, resolveObjectPath(manifestPath)); err != nil { + s3Client, err := s3.NewClient(provider.Endpoint, &s3.Options{ + AccessKeyID: provider.AccessKey, + SecretAccessKey: provider.SecretKey, + Region: provider.Region, + UseSSL: provider.Secure, + BucketName: provider.Bucket, + ObjectPathPrefix: r.persistedOperationsConfig.Storage.ObjectPrefix, + TraceProvider: r.tracerProvider, + }) + if err != nil { + return fmt.Errorf("failed to create S3 client for PQL manifest: %w", err) + } + data, err := s3Client.FetchManifest(ctx, resolveObjectPath(manifestPath)) + if err != nil { return fmt.Errorf("failed to load PQL manifest from S3 provider %q: %w", storageProviderID, err) } + if err := pqlStore.LoadFromData(data); err != nil { + return fmt.Errorf("failed to parse PQL manifest from S3 provider %q: %w", storageProviderID, err) + } } else if provider, ok := cdnProviders[storageProviderID]; ok { providerType = "cdn" if r.graphApiToken == "" { return errors.New("graph token is required to fetch PQL manifest from CDN") } - if err := pqlStore.LoadFromCDN(ctx, provider.URL, r.graphApiToken, resolveObjectPath(manifestPath)); err != nil { + cdnClient, err := cdn.NewClient(provider.URL, r.graphApiToken, cdn.Options{ + Logger: r.logger, + }) + if err != nil { + return fmt.Errorf("failed to create CDN client for PQL manifest: %w", err) + } + data, err := cdnClient.FetchManifest(ctx, resolveObjectPath(manifestPath)) + if err != nil { return fmt.Errorf("failed to load PQL manifest from CDN provider %q: %w", storageProviderID, err) } + if err := pqlStore.LoadFromData(data); err != nil { + return fmt.Errorf("failed to parse PQL manifest from CDN provider %q: %w", storageProviderID, err) + } } else if storageProviderID == "" { providerType = "file" if err := pqlStore.LoadFromFile(manifestPath); err != nil { diff --git a/router/internal/persistedoperation/operationstorage/cdn/client.go b/router/internal/persistedoperation/operationstorage/cdn/client.go index f747931828..cdc0a548a6 100644 --- a/router/internal/persistedoperation/operationstorage/cdn/client.go +++ b/router/internal/persistedoperation/operationstorage/cdn/client.go @@ -164,4 +164,45 @@ func NewClient(endpoint string, token string, opts Options) (*client, error) { }, nil } +// FetchManifest fetches a PQL manifest from the CDN at the given path and returns the raw bytes. +func (cdn *client) FetchManifest(ctx context.Context, manifestPath string) ([]byte, error) { + manifestURL := cdn.cdnURL.ResolveReference(&url.URL{Path: manifestPath}) + + req, err := http.NewRequestWithContext(ctx, "GET", manifestURL.String(), nil) + if err != nil { + return nil, err + } + + req.Header.Set("Content-Type", "application/json; charset=UTF-8") + req.Header.Add("Authorization", "Bearer "+cdn.authenticationToken) + req.Header.Set("Accept-Encoding", "gzip") + + resp, err := cdn.httpClient.Do(req) + if err != nil { + return nil, err + } + defer func() { + _ = resp.Body.Close() + }() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("CDN returned status %d when fetching manifest", resp.StatusCode) + } + + var reader io.Reader = resp.Body + + if resp.Header.Get("Content-Encoding") == "gzip" { + r, err := gzip.NewReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("could not create gzip reader: %w", err) + } + defer func() { + _ = r.Close() + }() + reader = r + } + + return io.ReadAll(reader) +} + func (cdn *client) Close() {} diff --git a/router/internal/persistedoperation/operationstorage/fs/client.go b/router/internal/persistedoperation/operationstorage/fs/client.go index 6e7181d721..d0884b8b44 100644 --- a/router/internal/persistedoperation/operationstorage/fs/client.go +++ b/router/internal/persistedoperation/operationstorage/fs/client.go @@ -69,4 +69,10 @@ func (c client) persistedOperation(clientName string, sha256Hash string) ([]byte return []byte(po.Body), nil } +// FetchManifest reads a PQL manifest from the filesystem at the given path and returns the raw bytes. +func (c client) FetchManifest(manifestPath string) ([]byte, error) { + fullPath := filepath.Join(c.path, c.options.ObjectPathPrefix, manifestPath) + return os.ReadFile(fullPath) +} + func (c client) Close() {} diff --git a/router/internal/persistedoperation/operationstorage/s3/client.go b/router/internal/persistedoperation/operationstorage/s3/client.go index 55e8bbfa6b..c67e408566 100644 --- a/router/internal/persistedoperation/operationstorage/s3/client.go +++ b/router/internal/persistedoperation/operationstorage/s3/client.go @@ -108,4 +108,17 @@ func (c Client) persistedOperation(ctx context.Context, clientName, sha256Hash s return []byte(po.Body), nil } +// FetchManifest fetches a PQL manifest from S3 at the given object path and returns the raw bytes. +func (c Client) FetchManifest(ctx context.Context, objectPath string) ([]byte, error) { + reader, err := c.client.GetObject(ctx, c.options.BucketName, objectPath, minio.GetObjectOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get manifest from S3: %w", err) + } + defer func() { + _ = reader.Close() + }() + + return io.ReadAll(reader) +} + func (c Client) Close() {} diff --git a/router/internal/persistedoperation/pqlmanifest/store.go b/router/internal/persistedoperation/pqlmanifest/store.go index 998c314b5f..81912754ab 100644 --- a/router/internal/persistedoperation/pqlmanifest/store.go +++ b/router/internal/persistedoperation/pqlmanifest/store.go @@ -1,18 +1,11 @@ package pqlmanifest import ( - "context" "encoding/json" "fmt" - "io" - "net/http" - "net/url" "os" "sync/atomic" - "github.com/minio/minio-go/v7" - "github.com/minio/minio-go/v7/pkg/credentials" - "github.com/wundergraph/cosmo/router/pkg/config" "go.uber.org/zap" ) @@ -61,82 +54,11 @@ func (s *Store) LoadFromFile(path string) error { return fmt.Errorf("failed to read manifest file: %w", err) } - return s.loadFromData(data) + return s.LoadFromData(data) } -// LoadFromS3 fetches a manifest from an S3 bucket and loads it into the store. -func (s *Store) LoadFromS3(ctx context.Context, provider config.S3StorageProvider, objectPath string) error { - providers := []credentials.Provider{ - &credentials.Static{ - Value: credentials.Value{ - AccessKeyID: provider.AccessKey, - SecretAccessKey: provider.SecretKey, - SignerType: credentials.SignatureV4, - }, - }, - &credentials.IAM{ - Client: &http.Client{ - Transport: http.DefaultTransport, - }, - }, - } - - minioClient, err := minio.New(provider.Endpoint, &minio.Options{ - Creds: credentials.NewChainCredentials(providers), - Region: provider.Region, - Secure: provider.Secure, - }) - if err != nil { - return fmt.Errorf("failed to create S3 client: %w", err) - } - - reader, err := minioClient.GetObject(ctx, provider.Bucket, objectPath, minio.GetObjectOptions{}) - if err != nil { - return fmt.Errorf("failed to get object from S3: %w", err) - } - defer reader.Close() - - data, err := io.ReadAll(reader) - if err != nil { - return fmt.Errorf("failed to read S3 object: %w", err) - } - - return s.loadFromData(data) -} - -// LoadFromCDN fetches a manifest from a CDN endpoint and loads it into the store. -func (s *Store) LoadFromCDN(ctx context.Context, cdnURL, token, manifestPath string) error { - reqURL, err := url.JoinPath(cdnURL, manifestPath) - if err != nil { - return fmt.Errorf("failed to construct CDN URL: %w", err) - } - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) - if err != nil { - return fmt.Errorf("failed to create CDN request: %w", err) - } - req.Header.Set("Authorization", "Bearer "+token) - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return fmt.Errorf("failed to fetch manifest from CDN: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("CDN returned status %d", resp.StatusCode) - } - - data, err := io.ReadAll(resp.Body) - if err != nil { - return fmt.Errorf("failed to read CDN response: %w", err) - } - - return s.loadFromData(data) -} - -// loadFromData parses and validates manifest JSON data and loads it into the store. -func (s *Store) loadFromData(data []byte) error { +// LoadFromData parses and validates manifest JSON data and loads it into the store. +func (s *Store) LoadFromData(data []byte) error { var manifest Manifest if err := json.Unmarshal(data, &manifest); err != nil { return fmt.Errorf("failed to parse manifest: %w", err) From 876b24b13afe7c68b2cdf7b79c3effcd424dff61 Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Wed, 25 Mar 2026 15:41:20 +0530 Subject: [PATCH 18/31] feat: add path property for PQL manifest JSON file in config schema --- router/pkg/config/config.schema.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index 3640470904..9c0fc4b790 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -203,6 +203,10 @@ "description": "Enable the PQL manifest feature.", "default": false }, + "path": { + "type": "string", + "description": "The path to the PQL manifest JSON file." + }, "poll_interval": { "type": "string", "format": "go-duration", From 6fad8635c6222781a60e89891e1a8eef0bc1a8ca Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Wed, 25 Mar 2026 15:55:11 +0530 Subject: [PATCH 19/31] docs: update PQL Manifest section in README to clarify loading behavior and storage options --- router/internal/persistedoperation/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/router/internal/persistedoperation/README.md b/router/internal/persistedoperation/README.md index 128136abcd..d3ca60894d 100644 --- a/router/internal/persistedoperation/README.md +++ b/router/internal/persistedoperation/README.md @@ -8,7 +8,7 @@ Persisted operations are stored queries, which can be executed just by providing Specifically for those purposes, we enable three different methods of storing persisted operations: 1. **Persisted Operation Files** - This operation, documented [here](https://cosmo-docs.wundergraph.com/router/persisted-operations), allows users to store persisted operations in files in a CDN/S3 bucket, which are then loaded by the router individually per request. This is both useful for storing large queries, as well as by reducing the router's attack surface by only allowing registered operations. -2. **PQL Manifest** - When enabled, the router downloads a single JSON manifest containing all persisted operations at startup and polls for updates periodically. Operations are resolved entirely in-memory with zero per-request network overhead. When the manifest is enabled, it is authoritative — no CDN fallback occurs for individual operations. See the `pqlmanifest` subpackage. +2. **PQL Manifest** - When enabled, the router loads a single JSON manifest containing all persisted operations at startup. The manifest can be loaded from any configured storage provider (S3, CDN, or filesystem) via the `storage_providers` config, or from the Cosmo CDN by default. When no explicit `path` is set, the router fetches from the Cosmo CDN and polls for updates periodically. When a `path` is set with a storage provider, the manifest is loaded once at startup. Operations are resolved entirely in-memory with zero per-request network overhead. When the manifest is enabled, it is authoritative — no CDN fallback occurs for individual operations. See the `pqlmanifest` subpackage. 3. **Automatic Persisted Queries** - This setting allows users to automatically cache queries that are sent, as long as they are sent together with their sha256hash. This is a useful performance optimizer, as it allows the router to cache queries that are frequently requested, without the need to manually store them in a file. These methods can exist in concert — for example, users can enable the PQL manifest for zero-latency lookups and use APQ to cache ad-hoc queries. @@ -27,7 +27,7 @@ When a persisted operation request arrives, the router resolves it in this order > **Hash validation prerequisite:** When a request includes both a query body and `extensions.persistedQuery.sha256Hash`, the router validates the body against the hash and rejects the request if they do not match — _before_ any APQ or persisted-operation lookup occurs. See `router/core/graphql_prehandler.go` (`handleOperation`). 1. **Persisted Operations (CDN), no APQ** → The router fetches individual operations from CDN/S3 on demand. If a query is not found, the router returns an error. After the query is planned, the router caches the normalized query in the local persisted operation cache. -1. **PQL Manifest, no APQ** → The router downloads the manifest at startup and polls for updates. All lookups are in-memory. Unknown hashes are rejected immediately without any network call. +1. **PQL Manifest, no APQ** → The router loads the manifest at startup from the configured storage provider (S3, CDN, filesystem) or the Cosmo CDN. When using the Cosmo CDN, it also polls for updates. When using a custom storage provider with an explicit path, the manifest is loaded once at startup. All lookups are in-memory. Unknown hashes are rejected immediately without any network call. 1. **APQ, No Persisted Operations** → If a `persisted_operation` request is sent, the router checks the APQ cache first. If not found, it checks if a query body was sent with the request. If so, it validates the hash against the body, then executes and caches it. Otherwise, the router returns an error. 1. **No APQ, No Persisted Operations** → If a persisted operation is sent, the router returns an error, as there are no persisted operations stored. Even if a query is sent, the router will still error because APQ isn't enabled. 1. **APQ and Persisted Operations** → The router validates any included query body against the hash, then checks APQ first, then the PQL manifest or CDN (depending on config), then checks if a query body was attached. First match wins. From 673808336def1514efc48ec241d7074d6f67d206 Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Wed, 25 Mar 2026 16:00:28 +0530 Subject: [PATCH 20/31] fix: lint --- controlplane/src/core/repositories/OperationsRepository.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/controlplane/src/core/repositories/OperationsRepository.ts b/controlplane/src/core/repositories/OperationsRepository.ts index 42d43865a8..55f1ccdc56 100644 --- a/controlplane/src/core/repositories/OperationsRepository.ts +++ b/controlplane/src/core/repositories/OperationsRepository.ts @@ -555,7 +555,10 @@ export class OperationsRepository { const allOperations = await this.getAllPersistedOperationsForGraph(); if (allOperations.length === 0) { - logger.warn({ federatedGraphId: this.federatedGraphId }, 'No persisted operations with content found for manifest generation'); + logger.warn( + { federatedGraphId: this.federatedGraphId }, + 'No persisted operations with content found for manifest generation', + ); } const operations: Record = {}; From 5c55b9322a81dcd0aad516f1b1b509491fe48889 Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Thu, 26 Mar 2026 14:25:21 +0530 Subject: [PATCH 21/31] fix: tests --- controlplane/test/persisted-operations.test.ts | 2 +- controlplane/test/test-util.ts | 2 +- router/pkg/config/testdata/config_defaults.json | 1 + router/pkg/config/testdata/config_full.json | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/controlplane/test/persisted-operations.test.ts b/controlplane/test/persisted-operations.test.ts index 5b8befcb67..ee2b0c700d 100644 --- a/controlplane/test/persisted-operations.test.ts +++ b/controlplane/test/persisted-operations.test.ts @@ -764,7 +764,7 @@ describe('Persisted operations', (ctx) => { fedGraphName, namespace: 'default', clientName: 'test-client', - operations: [{ id: genID('trigger'), contents: `query ExceedsLimit { goodbye }` }], + operations: [{ id: genID('trigger'), contents: `query ExceedsLimit { hello }` }], }); expect(resp.response?.code).toBe(EnumStatusCode.ERR); expect(resp.response?.details).toContain('Operation limit exceeded'); diff --git a/controlplane/test/test-util.ts b/controlplane/test/test-util.ts index ea26e6d035..881ffc888f 100644 --- a/controlplane/test/test-util.ts +++ b/controlplane/test/test-util.ts @@ -116,7 +116,7 @@ export const SetupTest = async function ({ const realm = 'test'; const loginRealm = 'master'; - const apiUrl = 'http://localhost:8080'; + const apiUrl = process.env.KC_API_URL || 'http://localhost:8080'; const clientId = 'studio'; const adminUser = 'admin'; const adminPassword = 'changeme'; diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index c970e1ec9b..70c409a33a 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -545,6 +545,7 @@ }, "Manifest": { "Enabled": false, + "Path": "", "PollInterval": 10000000000, "PollJitter": 5000000000 } diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index 31380c18fe..ee05ac8750 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -980,6 +980,7 @@ }, "Manifest": { "Enabled": true, + "Path": "", "PollInterval": 30000000000, "PollJitter": 10000000000 } From 6a13a13d934614e4374989108fb5e754fa5f0ac4 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 26 Mar 2026 15:23:46 +0100 Subject: [PATCH 22/31] feat(persisted-query): enhance CDN client and add PQL manifest support --- docs-website/router/configuration.mdx | 64 +++++--- .../persisted-operations.mdx | 93 ++++++++++-- .../router/security/hardening-guide.mdx | 5 +- docs-website/router/storage-providers.mdx | 116 ++------------ .../{ => operations}/pql_manifest_test.go | 122 +++++++++++++++ router/core/graphql_prehandler.go | 13 +- router/core/router.go | 143 +++++------------- router/internal/persistedoperation/README.md | 6 +- router/internal/persistedoperation/client.go | 1 + .../operationstorage/cdn/client.go | 136 +++++++++-------- .../operationstorage/fs/client.go | 2 +- router/pkg/config/config.go | 1 - router/pkg/config/config.schema.json | 6 +- router/pkg/config/config_test.go | 7 +- .../pkg/config/testdata/config_defaults.json | 1 - router/pkg/config/testdata/config_full.json | 1 - 16 files changed, 387 insertions(+), 330 deletions(-) rename router-tests/{ => operations}/pql_manifest_test.go (78%) diff --git a/docs-website/router/configuration.mdx b/docs-website/router/configuration.mdx index b0df8644f7..ecc9b6b7bc 100644 --- a/docs-website/router/configuration.mdx +++ b/docs-website/router/configuration.mdx @@ -1167,14 +1167,17 @@ These rules apply to requests being made from the Router to all Subgraphs. | Environment Variable | YAML | Required | Description | Default Value | | ------------------------------------------ | ------------------------------------------ | --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------- | | | persisted_operations | | The configuration for the persisted operations. | | -| PERSISTED_OPERATIONS_DISABLED | persisted_operations.disabled | | Disable persisted operations. | false | +| PERSISTED_OPERATIONS_DISABLED | persisted_operations.disabled | | Disable persisted operations. When set to true, the PQL manifest is also disabled. | false | | | persisted_operations.cache | | LRU cache for persisted operations. | | | PERSISTED_OPERATIONS_CACHE_SIZE | persisted_operations.cache.size | | The size of the cache in SI unit. | "100MB" | | | persisted_operations.storage | | The storage provider for persisted operation. Only one provider can be active. When no provider is specified, the router will fallback to the Cosmo CDN provider to download the persisted operations. | | | PERSISTED_OPERATIONS_STORAGE_PROVIDER_ID | persisted_operations.storage.provider_id | | The ID of the storage provider. The ID must match the ID of the storage provider in the `storage_providers` section. | | -| PERSISTED_OPERATIONS_STORAGE_OBJECT_PREFIX | persisted_operations.storage.object_prefix | | The prefix of the object in the storage provider location. The prefix is put in front of the operation SHA256 hash. \$prefix/SHA256.json | | +| PERSISTED_OPERATIONS_STORAGE_OBJECT_PREFIX | persisted_operations.storage.object_prefix | | The prefix of the object in the storage provider location. The prefix is put in front of the operation SHA256 hash. \$prefix/SHA256.json | | | PERSISTED_OPERATIONS_LOG_UNKNOWN | persisted_operations.log_unknown | | Log operations (sent with the operation body) which haven't yet been persisted. If the value is true, all operations not yet persisted are logged to the router logs. | false | | PERSISTED_OPERATIONS_SAFELIST_ENABLED | persisted_operations.safelist.enabled | | Only allows persisted operations (sent with operation body). If the value is true, all operations not explicitly added to the safelist are blocked. | false | +| PERSISTED_OPERATIONS_MANIFEST_ENABLED | persisted_operations.manifest.enabled | | Enable the PQL manifest. When enabled, the router loads all persisted operations from a single `manifest.json` file and serves them from memory. Uses the `storage` config above when a provider is set, otherwise fetches from the Cosmo CDN. Only S3 and CDN storage providers are supported. | false | +| PERSISTED_OPERATIONS_MANIFEST_POLL_INTERVAL| persisted_operations.manifest.poll_interval| | The interval at which the router polls the Cosmo CDN for manifest updates (only when no storage provider is configured). Minimum 10s. | 10s | +| PERSISTED_OPERATIONS_MANIFEST_POLL_JITTER | persisted_operations.manifest.poll_jitter | | Random jitter added to each poll interval to avoid thundering herd. Minimum 1s. | 5s | ## Automatic Persisted Queries @@ -1218,27 +1221,54 @@ The configuration for the execution setup contains instructions for the router t ### Example YAML config: -```yaml config.yaml -version: "1" +You can load the execution config from a [storage provider](/router/storage-providers) or from a local file: -execution_config: - storage: - provider_id: s3 - object_path: /prod -``` + + ```yaml Storage provider + version: "1" + execution_config: + storage: + provider_id: s3 + object_path: "router.json" + ``` -or + ```yaml Local file + version: "1" + execution_config: + file: + path: "./__schemas/config.json" + watch: true + watch_interval: "1s" + ``` + + ```yaml Fallback storage + version: "1" + execution_config: + storage: + provider_id: s3 + object_path: "router.json" + fallback_storage: + enabled: true + provider_id: minio + object_path: "router.json" + ``` + -```yaml config.yaml -version: "1" +When using a storage provider, the `object_path` field points to the file in your bucket that is updated after each schema deployment in your CI/CD pipeline: -execution_config: - file: - path: "./__schemas/config.json" - watch: true - watch_interval: "1s" +```bash +# Publish your subgraph +wgc subgraph publish my-subgraph --schema ./schema.graphqls +# Download the latest execution config after successful composition +wgc router fetch mygraph -o router.json +# Upload the execution config to your S3 storage +aws s3 cp router.json s3://cosmo/ ``` +The router will check for updates every 10 seconds (default) and hot-reload accordingly without impacting current user traffic. + +You can configure a fallback storage for fetching the execution config in the event the router cannot reach the primary storage. You cannot use the same provider for both primary and fallback storage. + ### Execution config options | Environment Variable | YAML | Required | Description | Default Value | diff --git a/docs-website/router/persisted-queries/persisted-operations.mdx b/docs-website/router/persisted-queries/persisted-operations.mdx index 434e0641eb..18c27d449d 100644 --- a/docs-website/router/persisted-queries/persisted-operations.mdx +++ b/docs-website/router/persisted-queries/persisted-operations.mdx @@ -25,29 +25,76 @@ The control plane replicates these operations in the Cosmo CDN, where the router Persisted operations require some tooling on the client side. Consult the documentation for your GraphQL client library to find out how to generate a query manifest or query map. -Once this list of operations has been generated, typically in your CI or CD pipeline, you can use [`wgc`](/cli/intro) to register your operations: +### Supported manifest formats + +`wgc operations push` automatically detects the format of your manifest file. The following formats are supported: + + + + The Apollo persisted query manifest format: + + ```json manifest.json + { + "format": "apollo-persisted-query-manifest", + "version": 1, + "operations": [ + { + "id": "dc67510fb4289672bea757e862d6b00e83db5d3c", + "name": "GetEmployees", + "type": "query", + "body": "query GetEmployees { employees { id } }" + } + ] + } + ``` + + + Relay query maps are supported in two formats — as an array of `[id, query]` pairs or as a `{id: query}` object: + + ```json relay-query-map.json + { + "dc67510fb4289672bea757e862d6b00e83db5d3c": "query GetEmployees { employees { id } }", + "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2": "mutation UpdateEmployee($id: ID!) { updateEmployee(id: $id) { id } }" + } + ``` + + ```bash + wgc operations push mygraph -n default -c web -f relay-query-map.json + ``` + + + Plain `.graphql` or `.gql` files containing a single operation. The SHA-256 hash is computed automatically: + + ```graphql operations.graphql + query GetEmployees { + employees { + id + } + } + ``` + + ```bash + wgc operations push mygraph -n default -c web -f operations.graphql + ``` + + + +### Pushing operations + +Once your manifest is generated, push it using `wgc`: ```bash wgc operations push mygraph -n default -c web -f my-operations-manifest.json ``` -This will register the operations for your federated graph named `mygraph` in the `default` namespace (as seen in the Studio) and your client named `web` (indicated by the `graphql-client-name` HTTP header), using the same operation identifiers as your library when possible. If your library doesn't generate these identifiers, Cosmo will automatically generate them. +This registers the operations for your federated graph named `mygraph` in the `default` namespace and your client named `web` (indicated by the `graphql-client-name` HTTP header). You can push multiple files at once using the `-f` flag multiple times. -When pushing the operations, you will see a short summary of the operations that were pushed, indicating how many were created and how many were already registered. Alternatively, the `--output` flag can be used to obtain a JSON summary that can easily be processed by your tooling. +When pushing the operations, you will see a short summary indicating how many were created and how many were already registered. Use `--format json` for machine-readable output: ```bash -wgc operations push mygraph -n default -c my-client -f persisted-query-manifest.json --format json (11-25 10:23) -{ - "2d9df67f96ce804da7a9107d33373132a53bf56aec29ef4b4e06569a43a16935": { - "body": "query Employees {\n employees {\n id\n role {\n department\n title\n __typename\n }\n details {\n forename\n surname\n location\n __typename\n }\n __typename\n }\n}", - "status": "up_to_date" - }, -... -} +wgc operations push mygraph -n default -c my-client -f manifest.json --format json ``` -Finally, you should enable persisted operations in your GraphQL client library. - To see all available options for `wgc operations push`, see [Push](/cli/operations/push). Additionally, check the [Using Persisted Operation with Federated GraphQL](/tutorial/using-persisted-operations) tutorial for a step-by-step guide. @@ -69,6 +116,26 @@ Operations are currently deleted one at a time from the UI. Studio always asks for confirmation before deleting operations. If traffic is detected for the selected operation, the dialog warns that the operation is receiving traffic. If analytics data is unavailable, Studio cannot guarantee that existing clients won't break. You can always check the metrics using the link in the dialog. +## PQL Manifest + +By default, the router fetches persisted operations individually from the Cosmo CDN on each request. When the **PQL manifest** is enabled, the router instead loads all persisted operations from a single `manifest.json` file at startup and serves them entirely from memory — eliminating per-request network overhead. + +```yaml +persisted_operations: + manifest: + enabled: true +``` + +The manifest is automatically updated in the Cosmo CDN whenever operations are added or deleted via `wgc operations push` or Studio. The router polls for updates using `poll_interval` and `poll_jitter`, picking up changes without requiring a restart. You can also load the manifest from a custom [storage provider](#using-a-custom-storage-provider). + + + When the manifest is enabled, it is **authoritative** — the router does not fall back to fetching individual operations from the CDN. Unknown operation hashes are rejected immediately. + + +## Using a custom storage provider + +You can load persisted operations from your own S3-compatible storage instead of the Cosmo CDN. First, [define a storage provider](/router/storage-providers), then reference it in your persisted operations configuration. + ## Disallowing non-persisted Operations If you're going all in on Security, you'd want to only allow Persisted Operations in your Production Environment. diff --git a/docs-website/router/security/hardening-guide.mdx b/docs-website/router/security/hardening-guide.mdx index eca7b0ab72..3699bdd449 100644 --- a/docs-website/router/security/hardening-guide.mdx +++ b/docs-website/router/security/hardening-guide.mdx @@ -127,10 +127,13 @@ By default the subgraph routing URL from the [wgc subgraph create](/cli/subgraph [Persistent operations](/router/persisted-queries/persisted-operations) are a great way to save bandwidth but also to reduce the attack vectors by only allowing known queries to be executed. -By default, both persistent queries and regular queries are allowed. To allow only persistent queries, the following configuration should be applied: +We recommend enabling the [PQL manifest](/router/persisted-queries/persisted-operations#pql-manifest) to load all persisted operations at startup and serve them from memory. This eliminates per-request network calls and ensures the router has a complete, authoritative view of all allowed operations. ```yaml router.yaml + persisted_operations: + manifest: + enabled: true security: block_non_persisted_operations: enabled: true diff --git a/docs-website/router/storage-providers.mdx b/docs-website/router/storage-providers.mdx index 43fc7421ee..10f21362cc 100644 --- a/docs-website/router/storage-providers.mdx +++ b/docs-website/router/storage-providers.mdx @@ -5,9 +5,7 @@ description: "To maintain control over your data and ensure high performance, ut --- -The router is a stateless component, allowing it to be scaled horizontally without requiring persistence. However, it needs to fetch the execution configuration from a storage source at startup and when new schema updates are published. This execution config provides the instructions for executing and planning GraphQL operations. - -The same approach applies to persisted operations (PO), which manage an allowlist of permitted GraphQL operations that can execute against the router. The router must download these operations once before they can be executed. +The router is a stateless component, allowing it to be scaled horizontally without requiring persistence. However, it needs to fetch artifacts such as the execution configuration and persisted operations from a storage source. By default, the router fetches these from the Cosmo CDN. You can configure custom storage providers to use your own infrastructure instead. For both mechanisms, different storage providers can be used: @@ -15,18 +13,14 @@ For both mechanisms, different storage providers can be used: * **Amazon S3**: An object storage protocol. We support any S3 compatible object-storage e.g. Minio and AWS. - - By removing the dependency on Cosmo Cloud, your router no longer relies on us for operations while still benefiting from all its features. - - -When using a custom storage provider, you are responsible for manually pushing those artifacts as part of your CI process. In the next section, we will explain how to do this: +When using a custom storage provider, you are responsible for manually pushing those artifacts as part of your CI process. ## Define a provider -Before you can use a storage provider, you have to define it in the [`storage_providers`](/router/storage-providers) section of your `config.yaml` file. +Before you can use a storage provider, you have to define it in the `storage_providers` section of your `config.yaml` file. Each provider is given an `id` that you reference from other configuration sections. - ```bash config.yaml + ```yaml config.yaml version: 1 storage_providers: s3: @@ -47,7 +41,7 @@ Before you can use a storage provider, you have to define it in the [`storage_pr If you are using EC2 or EKS on AWS and have configured [`node IAM roles`](https://docs.aws.amazon.com/eks/latest/userguide/create-node-role.html) you don't need to provide an `access_key` or `secret_key` and the S3 client will handle this on your behalf. - ```bash config.yaml +```yaml config.yaml version: 1 storage_providers: s3: @@ -59,103 +53,13 @@ storage_providers: ``` -## Execution config - -After configuring the storage provider, you can use it by referencing it in the `provider_id` field. - - - ```bash config.yaml - version: 1 - execution_config: - storage: - provider_id: s3 - object_path: "router.json" - ``` - - -The `object_path` field points to the file in your bucket that is updated after each schema deployment in your CI/CD pipeline. The following snippet illustrates how it could look like: - -```bash -# Publish your subgraph -wgc subgraph publish my-subgraph --schema ./schema.graphqls -# Download the latest execution config after successful composition -wgc router fetch mygraph -o router.json -# Upload the execution config to your S3 storage -aws s3 cp router.json s3://cosmo/ -``` +## Using storage providers -First, we publish our schema changes to Cosmo. After that, we will download the latest execution config and upload it to your own S3 provider. The router will check for updates every 10 seconds (default) and hot-reload the router accordingly without impacting current user traffic. +Once a provider is defined, you reference it by `provider_id` in the configuration of each feature. The following features support custom storage providers: -### Fallback storage +* [**Execution config**](/router/configuration#execution-config-options) — load the router execution configuration from S3 instead of the Cosmo CDN. +* [**Persisted operations**](/router/persisted-queries/persisted-operations#using-a-custom-storage-provider) — load individual persisted operations or the PQL manifest from S3. -You can configure a fallback storage for fetching the execution config in the event the router cannot reach the primary storage. If enabled, it will default to using the Cosmo CDN but you can specify the provider as well. You cannot use the same provider for both primary and fallback storage. - - - ```yaml config.yaml - version: 1 - execution_config: - storage: - provider_id: s3 - object_path: "router.json" - fallback_storage: - enabled: true - provider_id: minio - object_path: "router.json" - ``` - - -### Best Practices +## Best Practices * Create different S3 credentials for READ and WRITE to reduce the attack surface. - -## Persisted Operations - -After configuring the storage provider, you can use it by referencing it in the `provider_id` field. - - - ```yaml config.yaml - version: 1 - persisted_operations: - cache: - size: 100MB - storage: - provider_id: s3 - object_prefix: "prod/operations" - ``` - - -The `object_prefix` field points to the location in the bucket where the persisted operations are uploaded. Each persisted operation needs to follow the naming convention: **SHA256** of the file + `.json` as filename extension. A persisted operation is a JSON document and must follow the following structure: - -* `version`: The version of the persisted operation format. - -* `body`: The actual content of the persistent operation. - -### Example - -Given the following persisted operation: - - - ```json c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2.json - {"version":1,"body":"{\n employees {\n id\n details {\n forename\n }\n }\n}"} - ``` - - -Upload the file to the bucket location as follows: - -```bash -# Upload the persisted operation to S3 -aws s3 cp c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2.json s3://cosmo/prod/operations/ -``` - -Now, you can make a persisted operation (PO) request against the router, and it will fetch the operation from your S3 and execute it. Subsequent requests are cached and won't add additional latency. - -```bash -curl 'http://localhost:3002/graphql' \ - -H 'graphql-client-name: test' \ - -H 'Content-Type: application/json' \ - -d '{"extensions":{"persistedQuery":{"version":1,"sha256Hash":"c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2"}}}' -``` - -### Best Practices - -* We can cache persisted operations effectively only by using an immutable filename, such as the SHA-256 hash of the file. diff --git a/router-tests/pql_manifest_test.go b/router-tests/operations/pql_manifest_test.go similarity index 78% rename from router-tests/pql_manifest_test.go rename to router-tests/operations/pql_manifest_test.go index cdc1da71c0..706607e2b5 100644 --- a/router-tests/pql_manifest_test.go +++ b/router-tests/operations/pql_manifest_test.go @@ -119,6 +119,36 @@ func TestPQLManifest(t *testing.T) { }) }) + t.Run("defaults to Cosmo CDN when no storage provider configured", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfig), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + hasManifestRequest := false + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + require.False(t, strings.Contains(req, "/operations/my-client/"), + "expected no individual operation CDN requests, but got: %s", req) + } + require.True(t, hasManifestRequest, "CDN should be called for manifest when no storage provider is configured") + }) + }) + t.Run("safelist with manifest allows known queries", func(t *testing.T) { t.Parallel() testenv.Run(t, &testenv.Config{ @@ -207,6 +237,39 @@ func TestPQLManifest(t *testing.T) { }) }) + t.Run("log_unknown with manifest returns not found for hash-only request", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + LogUnknown: true, + }), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.WarnLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // Hash-only request with no query body — should return PersistedQueryNotFound, not "empty request body" + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "0000000000000000000000000000000000000000000000000000000000000000"}}`), + Header: header, + }) + require.Equal(t, persistedNotFoundResp, res.Body) + + logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() + require.Len(t, logEntries, 1) + }) + }) + t.Run("without manifest CDN is used for individual operations", func(t *testing.T) { t.Parallel() testenv.Run(t, &testenv.Config{}, func(t *testing.T, xEnv *testenv.Environment) { @@ -463,4 +526,63 @@ func TestPQLManifest(t *testing.T) { }, 5*time.Second, 100*time.Millisecond) }) }) + + t.Run("disabled persisted operations suppresses manifest", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Disabled: true, + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // With persisted operations disabled, manifest should not load. + // A regular query should still work. + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + Query: "query { employees { id } }", + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // No manifest requests should be made to CDN + hasManifestRequest := false + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + } + require.False(t, hasManifestRequest, "CDN should not fetch manifest when persisted operations are disabled") + }) + }) + + t.Run("filesystem provider rejected for manifest", func(t *testing.T) { + t.Parallel() + testenv.FailsOnStartup(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + Storage: config.PersistedOperationsStorageConfig{ + ProviderID: "local", + }, + }), + core.WithStorageProviders(config.StorageProviders{ + FileSystem: []config.FileSystemStorageProvider{ + {ID: "local", Path: "."}, + }, + }), + }, + }, func(t *testing.T, err error) { + require.ErrorContains(t, err, "filesystem storage provider") + require.ErrorContains(t, err, "not supported for PQL manifest") + }) + }) } diff --git a/router/core/graphql_prehandler.go b/router/core/graphql_prehandler.go index d8535e4a98..122f848cd0 100644 --- a/router/core/graphql_prehandler.go +++ b/router/core/graphql_prehandler.go @@ -629,11 +629,16 @@ func (h *PreHandler) handleOperation(req *http.Request, httpOperation *httpOpera var poNotFoundErr *persistedoperation.PersistentOperationNotFoundError if h.operationBlocker.logUnknownOperationsEnabled && errors.As(err, &poNotFoundErr) { requestContext.logger.Warn("Unknown persisted operation found", zap.String("query", operationKit.parsedOperation.Request.Query), zap.String("sha256Hash", poNotFoundErr.Sha256Hash)) - if h.operationBlocker.safelistEnabled { - span.End() - return err + // When log_unknown is enabled, ad-hoc queries whose hash doesn't match a + // persisted operation are logged above. We only allow execution to continue + // when the request includes a query body (the ad-hoc query to run) and + // safelist is not enforced. Hash-only requests without a body have nothing + // to execute, so we always return the not-found error in that case. + if !h.operationBlocker.safelistEnabled && operationKit.parsedOperation.Request.Query != "" { + err = nil } - } else { + } + if err != nil { span.End() return err } diff --git a/router/core/router.go b/router/core/router.go index 9bcc56734f..608057a813 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -528,7 +528,6 @@ func NewRouter(opts ...Option) (*Router, error) { r.engineExecutionConfiguration.Debug.EnableCacheResponseHeaders = true } - if r.securityConfiguration.DepthLimit != nil { r.logger.Warn("The security configuration field 'depth_limit' is deprecated, and will be removed. Use 'security.complexity_limits.depth' instead.") @@ -1155,6 +1154,9 @@ func (r *Router) buildClients(ctx context.Context) error { fileSystemProviders[provider.ID] = provider } + // Create the storage client for persisted operations based on the configured provider. + // The same client is reused for manifest fetching when the manifest feature is enabled, + // since both features are exclusive (manifest replaces individual operation fetches). var pClient persistedoperation.StorageClient if !r.persistedOperationsConfig.Disabled { @@ -1167,7 +1169,7 @@ func (r *Router) buildClients(ctx context.Context) error { Logger: r.logger, }) if err != nil { - return err + return fmt.Errorf("failed to create CDN client: %w", err) } pClient = c @@ -1186,7 +1188,7 @@ func (r *Router) buildClients(ctx context.Context) error { TraceProvider: r.tracerProvider, }) if err != nil { - return err + return fmt.Errorf("failed to create S3 client: %w", err) } pClient = c @@ -1198,7 +1200,7 @@ func (r *Router) buildClients(ctx context.Context) error { ObjectPathPrefix: r.persistedOperationsConfig.Storage.ObjectPrefix, }) if err != nil { - return err + return fmt.Errorf("failed to create filesystem client: %w", err) } pClient = c @@ -1214,7 +1216,7 @@ func (r *Router) buildClients(ctx context.Context) error { Logger: r.logger, }) if err != nil { - return err + return fmt.Errorf("failed to create CDN client: %w", err) } pClient = c @@ -1255,16 +1257,40 @@ func (r *Router) buildClients(ctx context.Context) error { var pqlStore *pqlmanifest.Store - if r.persistedOperationsConfig.Manifest.Enabled { + if r.persistedOperationsConfig.Manifest.Enabled && !r.persistedOperationsConfig.Disabled { + const manifestFileName = "manifest.json" + pqlStore = pqlmanifest.NewStore(r.logger) - manifestPath := r.persistedOperationsConfig.Manifest.Path - if manifestPath != "" { - if err := r.loadPQLManifestFromStorage(ctx, pqlStore, manifestPath, fileSystemProviders, s3Providers, cdnProviders); err != nil { - return err + storageProviderID := r.persistedOperationsConfig.Storage.ProviderID + + if _, ok := fileSystemProviders[storageProviderID]; ok { + return fmt.Errorf("filesystem storage provider %q is not supported for PQL manifest; use S3 or CDN instead", storageProviderID) + } + + if storageProviderID != "" { + // An explicit storage provider is configured — use the already-created client to fetch the manifest once at startup. + objectPrefix := r.persistedOperationsConfig.Storage.ObjectPrefix + objectPath := manifestFileName + if objectPrefix != "" { + objectPath = path.Join(objectPrefix, manifestFileName) } + + data, err := pClient.FetchManifest(ctx, objectPath) + if err != nil { + return fmt.Errorf("failed to fetch PQL manifest from storage provider %q: %w", + r.persistedOperationsConfig.Storage.ProviderID, err) + } + if err := pqlStore.LoadFromData(data); err != nil { + return fmt.Errorf("failed to parse PQL manifest from storage provider %q: %w", + r.persistedOperationsConfig.Storage.ProviderID, err) + } + r.logger.Info("Loaded PQL manifest from storage provider", + zap.String("provider_id", r.persistedOperationsConfig.Storage.ProviderID), + zap.Int("operations", pqlStore.OperationCount()), + ) } else { - // No path set — fetch manifest from CDN and poll for updates + // No storage provider configured — fetch manifest from Cosmo CDN and poll for updates. if r.graphApiToken == "" { return errors.New("graph token is required for PQL manifest") } @@ -1288,7 +1314,7 @@ func (r *Router) buildClients(ctx context.Context) error { go poller.Poll(ctx) } - // When manifest is enabled, do not use CDN fetches for individual operations + // Manifest is authoritative — individual operation fetches are not needed. pClient = nil } @@ -1324,99 +1350,6 @@ func (r *Router) buildClients(ctx context.Context) error { return nil } -// loadPQLManifestFromStorage loads a PQL manifest from the configured storage provider. -func (r *Router) loadPQLManifestFromStorage( - ctx context.Context, - pqlStore *pqlmanifest.Store, - manifestPath string, - fileSystemProviders map[string]config.FileSystemStorageProvider, - s3Providers map[string]config.S3StorageProvider, - cdnProviders map[string]config.CDNStorageProvider, -) error { - storageProviderID := r.persistedOperationsConfig.Storage.ProviderID - objectPrefix := r.persistedOperationsConfig.Storage.ObjectPrefix - - resolveObjectPath := func(p string) string { - if objectPrefix != "" { - return path.Join(objectPrefix, p) - } - return p - } - - var providerType string - - if provider, ok := fileSystemProviders[storageProviderID]; ok { - providerType = "filesystem" - fsClient, err := fs.NewClient(provider.Path, &fs.Options{ - ObjectPathPrefix: r.persistedOperationsConfig.Storage.ObjectPrefix, - }) - if err != nil { - return fmt.Errorf("failed to create filesystem client for PQL manifest: %w", err) - } - data, err := fsClient.FetchManifest(manifestPath) - if err != nil { - return fmt.Errorf("failed to load PQL manifest from filesystem provider %q: %w", storageProviderID, err) - } - if err := pqlStore.LoadFromData(data); err != nil { - return fmt.Errorf("failed to parse PQL manifest from filesystem provider %q: %w", storageProviderID, err) - } - } else if provider, ok := s3Providers[storageProviderID]; ok { - providerType = "s3" - s3Client, err := s3.NewClient(provider.Endpoint, &s3.Options{ - AccessKeyID: provider.AccessKey, - SecretAccessKey: provider.SecretKey, - Region: provider.Region, - UseSSL: provider.Secure, - BucketName: provider.Bucket, - ObjectPathPrefix: r.persistedOperationsConfig.Storage.ObjectPrefix, - TraceProvider: r.tracerProvider, - }) - if err != nil { - return fmt.Errorf("failed to create S3 client for PQL manifest: %w", err) - } - data, err := s3Client.FetchManifest(ctx, resolveObjectPath(manifestPath)) - if err != nil { - return fmt.Errorf("failed to load PQL manifest from S3 provider %q: %w", storageProviderID, err) - } - if err := pqlStore.LoadFromData(data); err != nil { - return fmt.Errorf("failed to parse PQL manifest from S3 provider %q: %w", storageProviderID, err) - } - } else if provider, ok := cdnProviders[storageProviderID]; ok { - providerType = "cdn" - if r.graphApiToken == "" { - return errors.New("graph token is required to fetch PQL manifest from CDN") - } - cdnClient, err := cdn.NewClient(provider.URL, r.graphApiToken, cdn.Options{ - Logger: r.logger, - }) - if err != nil { - return fmt.Errorf("failed to create CDN client for PQL manifest: %w", err) - } - data, err := cdnClient.FetchManifest(ctx, resolveObjectPath(manifestPath)) - if err != nil { - return fmt.Errorf("failed to load PQL manifest from CDN provider %q: %w", storageProviderID, err) - } - if err := pqlStore.LoadFromData(data); err != nil { - return fmt.Errorf("failed to parse PQL manifest from CDN provider %q: %w", storageProviderID, err) - } - } else if storageProviderID == "" { - providerType = "file" - if err := pqlStore.LoadFromFile(manifestPath); err != nil { - return fmt.Errorf("failed to load PQL manifest from file %q: %w", manifestPath, err) - } - } else { - return fmt.Errorf("unknown storage provider id %q for PQL manifest", storageProviderID) - } - - r.logger.Info("Loaded PQL manifest", - zap.String("source", providerType), - zap.String("provider_id", storageProviderID), - zap.Int("operations", pqlStore.OperationCount()), - ) - - return nil -} - // Start starts the router. It does block until the router has been initialized. After that the server is listening // on a separate goroutine. The server can be shutdown with Router.Shutdown(). Not safe for concurrent use. // During initialization, the router will register itself with the control plane and poll the config from the CDN diff --git a/router/internal/persistedoperation/README.md b/router/internal/persistedoperation/README.md index d3ca60894d..d780333150 100644 --- a/router/internal/persistedoperation/README.md +++ b/router/internal/persistedoperation/README.md @@ -7,8 +7,8 @@ Persisted operations are stored queries, which can be executed just by providing Specifically for those purposes, we enable three different methods of storing persisted operations: -1. **Persisted Operation Files** - This operation, documented [here](https://cosmo-docs.wundergraph.com/router/persisted-operations), allows users to store persisted operations in files in a CDN/S3 bucket, which are then loaded by the router individually per request. This is both useful for storing large queries, as well as by reducing the router's attack surface by only allowing registered operations. -2. **PQL Manifest** - When enabled, the router loads a single JSON manifest containing all persisted operations at startup. The manifest can be loaded from any configured storage provider (S3, CDN, or filesystem) via the `storage_providers` config, or from the Cosmo CDN by default. When no explicit `path` is set, the router fetches from the Cosmo CDN and polls for updates periodically. When a `path` is set with a storage provider, the manifest is loaded once at startup. Operations are resolved entirely in-memory with zero per-request network overhead. When the manifest is enabled, it is authoritative — no CDN fallback occurs for individual operations. See the `pqlmanifest` subpackage. +1. **Persisted Operation Files** - This operation, documented [here](https://cosmo-docs.wundergraph.com/router/persisted-queries/persisted-operations), allows users to store persisted operations in files in a CDN/S3 bucket, which are then loaded by the router individually per request. This is useful for storing large queries and for only allowing registered operations. +2. **PQL Manifest (Recommended)** - When enabled, the router loads a single JSON manifest (`manifest.json`) containing all persisted operations at startup. The manifest uses the same `storage` config as persisted operations (both features are exclusive). When a `storage.provider_id` is configured, the manifest is loaded from that provider (S3, CDN, or filesystem) at startup (the file is resolved as `/manifest.json`). When no storage provider is configured, the router fetches from the Cosmo CDN and polls for updates periodically. Operations are resolved entirely in-memory with zero per-request network overhead. When the manifest is enabled, it is authoritative — no fallback occurs for individual operations. We suggest using the PQL Manifest as the preferred method for persisted operations. See the `pqlmanifest` subpackage. 3. **Automatic Persisted Queries** - This setting allows users to automatically cache queries that are sent, as long as they are sent together with their sha256hash. This is a useful performance optimizer, as it allows the router to cache queries that are frequently requested, without the need to manually store them in a file. These methods can exist in concert — for example, users can enable the PQL manifest for zero-latency lookups and use APQ to cache ad-hoc queries. @@ -27,7 +27,7 @@ When a persisted operation request arrives, the router resolves it in this order > **Hash validation prerequisite:** When a request includes both a query body and `extensions.persistedQuery.sha256Hash`, the router validates the body against the hash and rejects the request if they do not match — _before_ any APQ or persisted-operation lookup occurs. See `router/core/graphql_prehandler.go` (`handleOperation`). 1. **Persisted Operations (CDN), no APQ** → The router fetches individual operations from CDN/S3 on demand. If a query is not found, the router returns an error. After the query is planned, the router caches the normalized query in the local persisted operation cache. -1. **PQL Manifest, no APQ** → The router loads the manifest at startup from the configured storage provider (S3, CDN, filesystem) or the Cosmo CDN. When using the Cosmo CDN, it also polls for updates. When using a custom storage provider with an explicit path, the manifest is loaded once at startup. All lookups are in-memory. Unknown hashes are rejected immediately without any network call. +1. **PQL Manifest, no APQ** → The router loads the manifest (`manifest.json`) at startup from the configured storage provider (S3, CDN, or filesystem). When no storage provider is configured, the router fetches from the Cosmo CDN and polls for updates. When a storage provider is configured, the manifest is loaded once at startup. All lookups are in-memory. Unknown hashes are rejected immediately without any network call. 1. **APQ, No Persisted Operations** → If a `persisted_operation` request is sent, the router checks the APQ cache first. If not found, it checks if a query body was sent with the request. If so, it validates the hash against the body, then executes and caches it. Otherwise, the router returns an error. 1. **No APQ, No Persisted Operations** → If a persisted operation is sent, the router returns an error, as there are no persisted operations stored. Even if a query is sent, the router will still error because APQ isn't enabled. 1. **APQ and Persisted Operations** → The router validates any included query body against the hash, then checks APQ first, then the PQL manifest or CDN (depending on config), then checks if a query body was attached. First match wins. diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index 9b7dd82345..29c05dc1d0 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -27,6 +27,7 @@ func (e PersistentOperationNotFoundError) Error() string { type StorageClient interface { PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) + FetchManifest(ctx context.Context, objectPath string) ([]byte, error) Close() } diff --git a/router/internal/persistedoperation/operationstorage/cdn/client.go b/router/internal/persistedoperation/operationstorage/cdn/client.go index cdc0a548a6..d422ab89af 100644 --- a/router/internal/persistedoperation/operationstorage/cdn/client.go +++ b/router/internal/persistedoperation/operationstorage/cdn/client.go @@ -24,12 +24,12 @@ type Options struct { Logger *zap.Logger } -// Deprecated: The CDN-based persisted operation client is deprecated. +// Deprecated: The CDN-based persisted operation Client is deprecated. // The router now downloads all operations at once via the PQL manifest, avoiding -// per-request CDN latency. This client is kept for backward compatibility. -var _ persistedoperation.StorageClient = (*client)(nil) +// per-request CDN latency. This Client is kept for backward compatibility. +var _ persistedoperation.StorageClient = (*Client)(nil) -type client struct { +type Client struct { cdnURL *url.URL authenticationToken string // federatedGraphID is the ID of the federated graph that was obtained @@ -42,7 +42,39 @@ type client struct { logger *zap.Logger } -func (cdn *client) PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) { +// NewClient creates a new CDN Client. URL is the URL of the CDN. +// Token is the token used to authenticate with the CDN, the same as the GRAPH_API_TOKEN +func NewClient(endpoint string, token string, opts Options) (*Client, error) { + u, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("invalid CDN URL %q: %w", endpoint, err) + } + + if opts.Logger == nil { + opts.Logger = zap.NewNop() + } + + claims, err := jwt.ExtractFederatedGraphTokenClaims(token) + if err != nil { + return nil, err + } + + logger := opts.Logger.With( + zap.String("component", "persisted_operations_client"), + zap.String("url", endpoint), + ) + + return &Client{ + cdnURL: u, + authenticationToken: token, + federatedGraphID: url.PathEscape(claims.FederatedGraphID), + organizationID: url.PathEscape(claims.OrganizationID), + httpClient: httpclient.NewRetryableHTTPClient(logger), + logger: logger, + }, nil +} + +func (cdn *Client) PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) { content, err := cdn.persistedOperation(ctx, clientName, sha256Hash) if err != nil { return nil, err @@ -51,7 +83,7 @@ func (cdn *client) PersistedOperation(ctx context.Context, clientName string, sh return content, nil } -func (cdn *client) persistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) { +func (cdn *Client) persistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) { span := trace.SpanFromContext(ctx) @@ -73,9 +105,7 @@ func (cdn *client) persistedOperation(ctx context.Context, clientName string, sh semconv12.HTTPHostKey.String(req.Host), ) - req.Header.Set("Content-Type", "application/json; charset=UTF-8") - req.Header.Add("Authorization", "Bearer "+cdn.authenticationToken) - req.Header.Set("Accept-Encoding", "gzip") + cdn.setCDNHeaders(req) resp, err := cdn.httpClient.Do(req) if err != nil { @@ -105,18 +135,11 @@ func (cdn *client) persistedOperation(ctx context.Context, clientName string, sh return nil, fmt.Errorf("unexpected status code when loading persisted operation, statusCode: %d", resp.StatusCode) } - var reader io.Reader = resp.Body - - if resp.Header.Get("Content-Encoding") == "gzip" { - r, err := gzip.NewReader(resp.Body) - if err != nil { - return nil, errors.New("could not create gzip reader. " + err.Error()) - } - defer func() { - _ = r.Close() - }() - reader = r + reader, cleanup, err := gzipAwareReader(resp) + if err != nil { + return nil, err } + defer cleanup() body, err := io.ReadAll(reader) if err != nil { @@ -132,50 +155,16 @@ func (cdn *client) persistedOperation(ctx context.Context, clientName string, sh return []byte(po.Body), nil } -// NewClient creates a new CDN client. URL is the URL of the CDN. -// Token is the token used to authenticate with the CDN, the same as the GRAPH_API_TOKEN -func NewClient(endpoint string, token string, opts Options) (*client, error) { - u, err := url.Parse(endpoint) - if err != nil { - return nil, fmt.Errorf("invalid CDN URL %q: %w", endpoint, err) - } - - if opts.Logger == nil { - opts.Logger = zap.NewNop() - } - - claims, err := jwt.ExtractFederatedGraphTokenClaims(token) - if err != nil { - return nil, err - } - - logger := opts.Logger.With( - zap.String("component", "persisted_operations_client"), - zap.String("url", endpoint), - ) - - return &client{ - cdnURL: u, - authenticationToken: token, - federatedGraphID: url.PathEscape(claims.FederatedGraphID), - organizationID: url.PathEscape(claims.OrganizationID), - httpClient: httpclient.NewRetryableHTTPClient(logger), - logger: logger, - }, nil -} - // FetchManifest fetches a PQL manifest from the CDN at the given path and returns the raw bytes. -func (cdn *client) FetchManifest(ctx context.Context, manifestPath string) ([]byte, error) { +func (cdn *Client) FetchManifest(ctx context.Context, manifestPath string) ([]byte, error) { manifestURL := cdn.cdnURL.ResolveReference(&url.URL{Path: manifestPath}) - req, err := http.NewRequestWithContext(ctx, "GET", manifestURL.String(), nil) + req, err := http.NewRequestWithContext(ctx, "POST", manifestURL.String(), nil) if err != nil { return nil, err } - req.Header.Set("Content-Type", "application/json; charset=UTF-8") - req.Header.Add("Authorization", "Bearer "+cdn.authenticationToken) - req.Header.Set("Accept-Encoding", "gzip") + cdn.setCDNHeaders(req) resp, err := cdn.httpClient.Do(req) if err != nil { @@ -186,23 +175,36 @@ func (cdn *client) FetchManifest(ctx context.Context, manifestPath string) ([]by }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("CDN returned status %d when fetching manifest", resp.StatusCode) + return nil, fmt.Errorf("CDN returned status %d when fetching persistent operation manifest", resp.StatusCode) } - var reader io.Reader = resp.Body + reader, cleanup, err := gzipAwareReader(resp) + if err != nil { + return nil, err + } + defer cleanup() + return io.ReadAll(reader) +} + +// setCDNHeaders sets the common headers for CDN requests. +func (cdn *Client) setCDNHeaders(req *http.Request) { + req.Header.Set("Content-Type", "application/json; charset=UTF-8") + req.Header.Add("Authorization", "Bearer "+cdn.authenticationToken) + req.Header.Set("Accept-Encoding", "gzip") +} + +// gzipAwareReader returns a reader that transparently decompresses the response body +// if the response is gzip-encoded, along with a cleanup function that must be deferred. +func gzipAwareReader(resp *http.Response) (io.Reader, func(), error) { if resp.Header.Get("Content-Encoding") == "gzip" { r, err := gzip.NewReader(resp.Body) if err != nil { - return nil, fmt.Errorf("could not create gzip reader: %w", err) + return nil, nil, fmt.Errorf("could not create gzip reader: %w", err) } - defer func() { - _ = r.Close() - }() - reader = r + return r, func() { _ = r.Close() }, nil } - - return io.ReadAll(reader) + return resp.Body, func() {}, nil } -func (cdn *client) Close() {} +func (cdn *Client) Close() {} diff --git a/router/internal/persistedoperation/operationstorage/fs/client.go b/router/internal/persistedoperation/operationstorage/fs/client.go index d0884b8b44..a99dfef916 100644 --- a/router/internal/persistedoperation/operationstorage/fs/client.go +++ b/router/internal/persistedoperation/operationstorage/fs/client.go @@ -70,7 +70,7 @@ func (c client) persistedOperation(clientName string, sha256Hash string) ([]byte } // FetchManifest reads a PQL manifest from the filesystem at the given path and returns the raw bytes. -func (c client) FetchManifest(manifestPath string) ([]byte, error) { +func (c client) FetchManifest(_ context.Context, manifestPath string) ([]byte, error) { fullPath := filepath.Join(c.path, c.options.ObjectPathPrefix, manifestPath) return os.ReadFile(fullPath) } diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index 806a3b2710..25944ee20b 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -974,7 +974,6 @@ type AutomaticPersistedQueriesCacheConfig struct { type PQLManifestConfig struct { Enabled bool `yaml:"enabled" envDefault:"false" env:"ENABLED"` - Path string `yaml:"path" env:"PATH"` PollInterval time.Duration `yaml:"poll_interval" envDefault:"10s" env:"POLL_INTERVAL"` PollJitter time.Duration `yaml:"poll_jitter" envDefault:"5s" env:"POLL_JITTER"` } diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index 3e408b49bb..c67a6c7700 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -181,7 +181,7 @@ }, "storage": { "description": "The storage provider for persisted operation. Only one provider can be active. When no provider is specified, the router will fallback to the Cosmo CDN provider to download the persisted operations.", - "required": ["provider_id", "object_prefix"], + "required": ["provider_id"], "properties": { "provider_id": { "description": "The ID of the storage provider. The ID must match the ID of the storage provider in the storage_providers section.", @@ -203,10 +203,6 @@ "description": "Enable the PQL manifest feature.", "default": false }, - "path": { - "type": "string", - "description": "The path to the PQL manifest JSON file." - }, "poll_interval": { "type": "string", "format": "go-duration", diff --git a/router/pkg/config/config_test.go b/router/pkg/config/config_test.go index 8aa1d95787..678324a0ef 100644 --- a/router/pkg/config/config_test.go +++ b/router/pkg/config/config_test.go @@ -505,7 +505,7 @@ persisted_operations: require.NoError(t, err, &js) } -func TestInvalidPersistedOperations(t *testing.T) { +func TestPersistedOperationsStorageWithoutObjectPrefix(t *testing.T) { t.Parallel() f := createTempFileFromFixture(t, ` @@ -525,12 +525,9 @@ persisted_operations: size: 100MB storage: provider_id: s3 - # Missing object_prefix `) _, err := LoadConfig([]string{f}) - var js *jsonschema.ValidationError - require.ErrorAs(t, err, &js) - require.Equal(t, "at '/persisted_operations/storage': missing property 'object_prefix'", js.Causes[0].Error()) + require.NoError(t, err) } func TestValidExecutionConfig(t *testing.T) { diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index 70c409a33a..c970e1ec9b 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -545,7 +545,6 @@ }, "Manifest": { "Enabled": false, - "Path": "", "PollInterval": 10000000000, "PollJitter": 5000000000 } diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index ee05ac8750..31380c18fe 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -980,7 +980,6 @@ }, "Manifest": { "Enabled": true, - "Path": "", "PollInterval": 30000000000, "PollJitter": 10000000000 } From c9bcc317b54aeab4ff7a5ef9b356a78795e20817 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 26 Mar 2026 16:03:14 +0100 Subject: [PATCH 23/31] feat(pql): add test for CDN manifest fetch failure and improve error handling --- router-tests/operations/pql_manifest_test.go | 29 ++++++++++++++++++++ router/core/router.go | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/router-tests/operations/pql_manifest_test.go b/router-tests/operations/pql_manifest_test.go index 706607e2b5..da4c463231 100644 --- a/router-tests/operations/pql_manifest_test.go +++ b/router-tests/operations/pql_manifest_test.go @@ -585,4 +585,33 @@ func TestPQLManifest(t *testing.T) { require.ErrorContains(t, err, "not supported for PQL manifest") }) }) + + t.Run("fails to start when initial CDN manifest fetch fails", func(t *testing.T) { + t.Parallel() + + cdnServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/operations/manifest.json") { + w.WriteHeader(http.StatusInternalServerError) + return + } + // Serve other CDN requests normally + w.WriteHeader(http.StatusNotFound) + })) + defer cdnServer.Close() + + testenv.FailsOnStartup(t, &testenv.Config{ + CdnSever: cdnServer, + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + }), + }, + }, func(t *testing.T, err error) { + require.ErrorContains(t, err, "failed to fetch initial PQL manifest") + }) + }) } diff --git a/router/core/router.go b/router/core/router.go index 608057a813..a34fc6c1d3 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -1308,7 +1308,7 @@ func (r *Router) buildClients(ctx context.Context) error { ) if err := poller.FetchInitial(ctx); err != nil { - r.logger.Warn("Failed to fetch initial PQL manifest, will retry on next poll", zap.Error(err)) + return fmt.Errorf("failed to fetch initial PQL manifest: %w", err) } go poller.Poll(ctx) From ffcbd35e299eb3565ef7b49fa21abc56018c6a94 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 26 Mar 2026 16:44:56 +0100 Subject: [PATCH 24/31] feat(pql): add test for CDN manifest fetch failure and improve error handling --- cdn-server/cdn/src/index.ts | 15 +- cdn-server/cdn/test/cdn.test.ts | 76 ++++--- router-tests/operations/pql_manifest_test.go | 13 +- .../persistedoperation/pqlmanifest/fetcher.go | 26 +-- .../pqlmanifest/fetcher_test.go | 204 ++++++++++++++++++ .../pqlmanifest/poller_test.go | 183 ++++++++++++++++ 6 files changed, 460 insertions(+), 57 deletions(-) create mode 100644 router/internal/persistedoperation/pqlmanifest/fetcher_test.go create mode 100644 router/internal/persistedoperation/pqlmanifest/poller_test.go diff --git a/cdn-server/cdn/src/index.ts b/cdn-server/cdn/src/index.ts index 8f9c59e81d..8fc2e5c17c 100644 --- a/cdn-server/cdn/src/index.ts +++ b/cdn-server/cdn/src/index.ts @@ -276,15 +276,14 @@ const persistedOperationsManifest = (storage: BlobStorage) => { const key = `${organizationId}/${federatedGraphId}/operations/manifest.json`; - const body = await c.req.json(); + const ifNoneMatch = c.req.header('If-None-Match'); + const clientVersion = ifNoneMatch?.replace(/^"(.*)"$/, '$1') || null; let isModified = true; - // Only check if revision is specified otherwise we assume the router - // starts for the first time, and we need to return the manifest anyway. - if (body?.revision) { + if (clientVersion) { try { - isModified = await storage.headObject({ context: c, key, version: body.revision }); + isModified = await storage.headObject({ context: c, key, version: clientVersion }); } catch (e: any) { if (e instanceof BlobNotFoundError) { return c.notFound(); @@ -294,6 +293,7 @@ const persistedOperationsManifest = (storage: BlobStorage) => { } if (!isModified) { + c.header('ETag', `"${clientVersion}"`); return c.body(null, 304); } @@ -308,6 +308,9 @@ const persistedOperationsManifest = (storage: BlobStorage) => { throw e; } + if (blobObject.metadata?.version) { + c.header('ETag', `"${blobObject.metadata.version}"`); + } c.header('Content-Type', 'application/json; charset=UTF-8'); return stream(c, async (stream) => { @@ -358,7 +361,7 @@ export const cdn = { test('it returns a 401 if no Authorization header is provided', async () => { const res = await app.request(requestPath, { - method: 'POST', - body: JSON.stringify({}), + method: 'GET', }); expect(res.status).toBe(401); }); test('it returns a 401 if an invalid Authorization header is provided', async () => { const res = await app.request(requestPath, { - method: 'POST', + method: 'GET', headers: { Authorization: `Bearer ${token.slice(0, -1)}}`, }, - body: JSON.stringify({}), }); expect(res.status).toBe(401); }); test('it returns a 400 if the graph or organization ids does not match with the JWT payload', async () => { const res = await app.request(`/foo/bar/operations/manifest.json`, { - method: 'POST', + method: 'GET', headers: { Authorization: `Bearer ${token}`, - 'Content-Type': 'application/json', }, - body: JSON.stringify({}), }); expect(res.status).toBe(400); }); @@ -609,17 +605,15 @@ describe('CDN handlers', () => { .setProtectedHeader({ alg: 'HS256' }) .sign(new TextEncoder().encode(secretKey)); const res = await app.request(requestPath, { - method: 'POST', + method: 'GET', headers: { Authorization: `Bearer ${token}`, - 'Content-Type': 'application/json', }, - body: JSON.stringify({}), }); expect(res.status).toBe(401); }); - test('it returns the manifest on first request without revision', async () => { + test('it returns the manifest with ETag on first request', async () => { const manifestContents = JSON.stringify({ version: 1, revision: 'abc123', @@ -635,36 +629,35 @@ describe('CDN handlers', () => { }); const res = await app.request(requestPath, { - method: 'POST', + method: 'GET', headers: { Authorization: `Bearer ${token}`, - 'Content-Type': 'application/json', }, - body: JSON.stringify({}), }); expect(res.status).toBe(200); expect(res.headers.get('Content-Type')).toBe('application/json; charset=UTF-8'); + expect(res.headers.get('ETag')).toBe('"abc123"'); expect(await res.text()).toBe(manifestContents); }); - test('it returns 304 when revision matches the current revision', async () => { + test('it returns 304 with ETag when If-None-Match matches', async () => { blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { buffer: Buffer.from(JSON.stringify({ version: 1, revision: 'abc123', operations: {} })), metadata: { version: 'abc123' }, }); const res = await app.request(requestPath, { - method: 'POST', + method: 'GET', headers: { Authorization: `Bearer ${token}`, - 'Content-Type': 'application/json', + 'If-None-Match': '"abc123"', }, - body: JSON.stringify({ revision: 'abc123' }), }); expect(res.status).toBe(304); + expect(res.headers.get('ETag')).toBe('"abc123"'); }); - test('it returns 200 when revision does not match the current revision', async () => { + test('it returns 200 with new ETag when If-None-Match does not match', async () => { const manifestContents = JSON.stringify({ version: 1, revision: 'def456', @@ -680,17 +673,52 @@ describe('CDN handlers', () => { }); const res = await app.request(requestPath, { - method: 'POST', + method: 'GET', headers: { Authorization: `Bearer ${token}`, - 'Content-Type': 'application/json', + 'If-None-Match': '"old-revision"', }, - body: JSON.stringify({ revision: 'old-revision' }), }); expect(res.status).toBe(200); + expect(res.headers.get('ETag')).toBe('"def456"'); expect(await res.text()).toBe(manifestContents); }); + test('ETag round-trip: fetch returns ETag, re-fetch with that ETag returns 304', async () => { + const manifestContents = JSON.stringify({ + version: 1, + revision: 'rev-round-trip', + generatedAt: '2025-01-01T00:00:00.000Z', + operations: { hash1: 'query { hello }' }, + }); + + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { + buffer: Buffer.from(manifestContents), + metadata: { version: 'rev-round-trip' }, + }); + + // First request: no ETag, should get 200 with ETag + const res1 = await app.request(requestPath, { + method: 'GET', + headers: { Authorization: `Bearer ${token}` }, + }); + expect(res1.status).toBe(200); + const etag = res1.headers.get('ETag'); + expect(etag).toBe('"rev-round-trip"'); + expect(await res1.text()).toBe(manifestContents); + + // Second request: send ETag back as If-None-Match, should get 304 + const res2 = await app.request(requestPath, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + 'If-None-Match': etag!, + }, + }); + expect(res2.status).toBe(304); + expect(res2.headers.get('ETag')).toBe(etag); + }); + test('it returns a 404 if the manifest does not exist', async () => { const otherBlobStorage = new InMemoryBlobStorage(); const otherApp = new Hono(); @@ -702,12 +730,10 @@ describe('CDN handlers', () => { }); const res = await otherApp.request(requestPath, { - method: 'POST', + method: 'GET', headers: { Authorization: `Bearer ${token}`, - 'Content-Type': 'application/json', }, - body: JSON.stringify({}), }); expect(res.status).toBe(404); }); diff --git a/router-tests/operations/pql_manifest_test.go b/router-tests/operations/pql_manifest_test.go index da4c463231..4256a8a009 100644 --- a/router-tests/operations/pql_manifest_test.go +++ b/router-tests/operations/pql_manifest_test.go @@ -448,13 +448,6 @@ func TestPQLManifest(t *testing.T) { cdnServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if strings.HasSuffix(r.URL.Path, "/operations/manifest.json") { - // Read the request body to check revision - body, _ := io.ReadAll(r.Body) - var reqBody struct { - Revision string `json:"revision"` - } - _ = json.Unmarshal(body, &reqBody) - manifest := currentManifest.Load().([]byte) // Parse manifest to get its revision @@ -463,12 +456,16 @@ func TestPQLManifest(t *testing.T) { } _ = json.Unmarshal(manifest, &m) - if reqBody.Revision == m.Revision { + // Check If-None-Match header for ETag-based conditional request + ifNoneMatch := r.Header.Get("If-None-Match") + if ifNoneMatch == `"`+m.Revision+`"` { + w.Header().Set("ETag", ifNoneMatch) w.WriteHeader(http.StatusNotModified) return } w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", `"`+m.Revision+`"`) w.WriteHeader(http.StatusOK) _, _ = w.Write(manifest) return diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher.go b/router/internal/persistedoperation/pqlmanifest/fetcher.go index af745665cc..956d08ca80 100644 --- a/router/internal/persistedoperation/pqlmanifest/fetcher.go +++ b/router/internal/persistedoperation/pqlmanifest/fetcher.go @@ -1,7 +1,6 @@ package pqlmanifest import ( - "bytes" "compress/gzip" "context" "encoding/json" @@ -16,10 +15,6 @@ import ( "go.uber.org/zap" ) -type manifestRequestBody struct { - Revision string `json:"revision,omitempty"` -} - type Fetcher struct { cdnURL *url.URL authenticationToken string @@ -65,28 +60,23 @@ func NewFetcher(endpoint, token string, logger *zap.Logger) (*Fetcher, error) { }, nil } -// Fetch downloads the manifest from the CDN. It POSTs to /{orgId}/{fedGraphId}/operations/manifest.json -// with Bearer auth, sending the current revision in the request body. The CDN returns 304 Not Modified -// when the revision matches, avoiding a full download. Returns (manifest, changed, err). +// Fetch downloads the manifest from the CDN. It GETs /{orgId}/{fedGraphId}/operations/manifest.json +// with Bearer auth, using If-None-Match for conditional requests. The CDN returns 304 Not Modified +// when the ETag matches, avoiding a full download. Returns (manifest, changed, err). func (f *Fetcher) Fetch(ctx context.Context, currentRevision string) (*Manifest, bool, error) { manifestPath := fmt.Sprintf("/%s/%s/operations/manifest.json", f.organizationID, f.federatedGraphID) manifestURL := f.cdnURL.ResolveReference(&url.URL{Path: manifestPath}) - reqBody, err := json.Marshal(manifestRequestBody{ - Revision: currentRevision, - }) - if err != nil { - return nil, false, fmt.Errorf("could not marshal request body: %w", err) - } - - req, err := http.NewRequestWithContext(ctx, "POST", manifestURL.String(), bytes.NewReader(reqBody)) + req, err := http.NewRequestWithContext(ctx, "GET", manifestURL.String(), nil) if err != nil { return nil, false, err } - req.Header.Set("Content-Type", "application/json; charset=UTF-8") - req.Header.Add("Authorization", "Bearer "+f.authenticationToken) + req.Header.Set("Authorization", "Bearer "+f.authenticationToken) req.Header.Set("Accept-Encoding", "gzip") + if currentRevision != "" { + req.Header.Set("If-None-Match", `"`+currentRevision+`"`) + } resp, err := f.httpClient.Do(req) if err != nil { diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher_test.go b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go new file mode 100644 index 0000000000..e0bec69d98 --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go @@ -0,0 +1,204 @@ +package pqlmanifest + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func newTestFetcher(serverURL string) *Fetcher { + u, _ := url.Parse(serverURL) + return &Fetcher{ + cdnURL: u, + authenticationToken: "test-token", + federatedGraphID: "graph-id", + organizationID: "org-id", + httpClient: &http.Client{}, + logger: zap.NewNop(), + } +} + +// mustMarshalManifest marshals a Manifest to JSON, panicking on error. +func mustMarshalManifest(m *Manifest) []byte { + data, err := json.Marshal(m) + if err != nil { + panic(err) + } + return data +} + +// newETagCDNHandler returns an http.Handler that serves a manifest with ETag support. +// It returns 304 when If-None-Match matches the manifest's revision. +func newETagCDNHandler(m *Manifest) http.Handler { + data := mustMarshalManifest(m) + etag := `"` + m.Revision + `"` + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("If-None-Match") == etag { + w.Header().Set("ETag", etag) + w.WriteHeader(http.StatusNotModified) + return + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", etag) + w.Write(data) + }) +} + +func TestFetch_SendsIfNoneMatchHeader(t *testing.T) { + var receivedHeaders http.Header + var receivedMethod string + var receivedBody []byte + + m := &Manifest{ + Version: 1, + Revision: "rev-123", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"hash1": "query { a }"}, + } + data := mustMarshalManifest(m) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedHeaders = r.Header + receivedMethod = r.Method + receivedBody, _ = io.ReadAll(r.Body) + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", `"rev-123"`) + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + result, changed, err := f.Fetch(context.Background(), "rev-123") + + require.NoError(t, err) + require.True(t, changed) + require.NotNil(t, result) + require.Equal(t, m.Revision, result.Revision) + require.Equal(t, `"rev-123"`, receivedHeaders.Get("If-None-Match")) + require.Equal(t, "GET", receivedMethod) + require.Empty(t, receivedBody, "GET request should have no body") +} + +func TestFetch_NoIfNoneMatchOnFirstRequest(t *testing.T) { + var receivedHeaders http.Header + + m := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"hash1": "query { a }"}, + } + + server := httptest.NewServer(newETagCDNHandler(m)) + defer server.Close() + + f := newTestFetcher(server.URL) + + // Wrap to capture headers + var origHandler http.Handler + origHandler = server.Config.Handler + server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedHeaders = r.Header + origHandler.ServeHTTP(w, r) + }) + + result, changed, err := f.Fetch(context.Background(), "") + + require.NoError(t, err) + require.True(t, changed) + require.NotNil(t, result) + require.Equal(t, "", receivedHeaders.Get("If-None-Match")) +} + +func TestFetch_Handles304Response(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotModified) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + result, changed, err := f.Fetch(context.Background(), "rev-123") + + require.NoError(t, err) + require.False(t, changed) + require.Nil(t, result) +} + +func TestFetch_Handles200WithManifest(t *testing.T) { + m := &Manifest{ + Version: 1, + Revision: "rev-456", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"hash1": "query { hello }"}, + } + + server := httptest.NewServer(newETagCDNHandler(m)) + defer server.Close() + + f := newTestFetcher(server.URL) + result, changed, err := f.Fetch(context.Background(), "rev-123") + + require.NoError(t, err) + require.True(t, changed) + require.NotNil(t, result) + require.Equal(t, m.Revision, result.Revision) + require.Equal(t, m.Operations["hash1"], result.Operations["hash1"]) +} + +func TestFetch_ETagRoundTrip(t *testing.T) { + m := &Manifest{ + Version: 1, + Revision: "rev-rt", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"h1": "query { a }"}, + } + + server := httptest.NewServer(newETagCDNHandler(m)) + defer server.Close() + + f := newTestFetcher(server.URL) + + // First fetch: no revision, should get full manifest + result, changed, err := f.Fetch(context.Background(), "") + require.NoError(t, err) + require.True(t, changed) + require.NotNil(t, result) + require.Equal(t, m.Revision, result.Revision) + + // Second fetch: send revision back, should get 304 + result2, changed2, err2 := f.Fetch(context.Background(), result.Revision) + require.NoError(t, err2) + require.False(t, changed2) + require.Nil(t, result2) +} + +func TestFetch_UsesGETMethod(t *testing.T) { + var receivedMethod string + + m := &Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{}, + } + data := mustMarshalManifest(m) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedMethod = r.Method + w.Header().Set("Content-Type", "application/json") + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + _, _, err := f.Fetch(context.Background(), "") + + require.NoError(t, err) + require.Equal(t, "GET", receivedMethod) +} diff --git a/router/internal/persistedoperation/pqlmanifest/poller_test.go b/router/internal/persistedoperation/pqlmanifest/poller_test.go new file mode 100644 index 0000000000..f71bf2f995 --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/poller_test.go @@ -0,0 +1,183 @@ +package pqlmanifest + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func TestPoller_FetchInitial(t *testing.T) { + m := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"h1": "query { a }"}, + } + + server := httptest.NewServer(newETagCDNHandler(m)) + defer server.Close() + + f := newTestFetcher(server.URL) + store := NewStore(zap.NewNop()) + poller := NewPoller(f, store, 10*time.Second, 1*time.Second, zap.NewNop()) + + err := poller.FetchInitial(context.Background()) + require.NoError(t, err) + require.True(t, store.IsLoaded()) + require.Equal(t, m.Revision, store.Revision()) + require.Equal(t, len(m.Operations), store.OperationCount()) +} + +func TestPoller_FetchInitialError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + store := NewStore(zap.NewNop()) + poller := NewPoller(f, store, 10*time.Second, 1*time.Second, zap.NewNop()) + + err := poller.FetchInitial(context.Background()) + require.Error(t, err) + require.False(t, store.IsLoaded()) +} + +func TestPoller_PollUpdatesManifest(t *testing.T) { + manifestV1 := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"h1": "query { a }"}, + } + manifestV2 := &Manifest{ + Version: 1, + Revision: "rev-2", + GeneratedAt: "2025-01-02T00:00:00Z", + Operations: map[string]string{"h1": "query { a }", "h2": "query { b }"}, + } + + var currentManifest atomic.Value + currentManifest.Store(manifestV1) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + m := currentManifest.Load().(*Manifest) + etag := `"` + m.Revision + `"` + if r.Header.Get("If-None-Match") == etag { + w.Header().Set("ETag", etag) + w.WriteHeader(http.StatusNotModified) + return + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", etag) + data, _ := json.Marshal(m) + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + store := NewStore(zap.NewNop()) + poller := NewPoller(f, store, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + + // Initial fetch + err := poller.FetchInitial(context.Background()) + require.NoError(t, err) + require.Equal(t, manifestV1.Revision, store.Revision()) + require.Equal(t, len(manifestV1.Operations), store.OperationCount()) + + // Start polling + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go poller.Poll(ctx) + + // Wait a few poll cycles — manifest should stay at rev-1 (304s) + time.Sleep(150 * time.Millisecond) + require.Equal(t, manifestV1.Revision, store.Revision()) + + // Update server to serve rev-2 + currentManifest.Store(manifestV2) + + // Wait for poller to pick up the change + require.Eventually(t, func() bool { + return store.Revision() == manifestV2.Revision + }, 2*time.Second, 10*time.Millisecond) + + require.Equal(t, len(manifestV2.Operations), store.OperationCount()) +} + +func TestPoller_PollStopsOnContextCancel(t *testing.T) { + var fetchCount atomic.Int32 + + m := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{}, + } + data := mustMarshalManifest(m) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fetchCount.Add(1) + w.Header().Set("Content-Type", "application/json") + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + store := NewStore(zap.NewNop()) + poller := NewPoller(f, store, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + + ctx, cancel := context.WithCancel(context.Background()) + go poller.Poll(ctx) + + // Let it poll a few times + time.Sleep(200 * time.Millisecond) + cancel() + + countAtCancel := fetchCount.Load() + // Wait and verify no more fetches happen + time.Sleep(200 * time.Millisecond) + require.Equal(t, countAtCancel, fetchCount.Load(), "poller should stop fetching after context cancel") +} + +func TestPoller_PollContinuesOnFetchError(t *testing.T) { + var requestCount atomic.Int32 + + m := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"h1": "query { a }"}, + } + data := mustMarshalManifest(m) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + count := requestCount.Add(1) + if count <= 2 { + w.WriteHeader(http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + store := NewStore(zap.NewNop()) + poller := NewPoller(f, store, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go poller.Poll(ctx) + + require.Eventually(t, func() bool { + return store.IsLoaded() && store.Revision() == m.Revision + }, 5*time.Second, 10*time.Millisecond) +} From a155ccf7cb57b1d3fa5c0b0d577b96c9cee59810 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 26 Mar 2026 16:51:04 +0100 Subject: [PATCH 25/31] refactor(tests): simplify handler assignment in fetcher tests --- router/internal/persistedoperation/pqlmanifest/fetcher_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher_test.go b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go index e0bec69d98..da486b7407 100644 --- a/router/internal/persistedoperation/pqlmanifest/fetcher_test.go +++ b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go @@ -102,8 +102,7 @@ func TestFetch_NoIfNoneMatchOnFirstRequest(t *testing.T) { f := newTestFetcher(server.URL) // Wrap to capture headers - var origHandler http.Handler - origHandler = server.Config.Handler + var origHandler http.Handler = server.Config.Handler server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { receivedHeaders = r.Header origHandler.ServeHTTP(w, r) From d9aa7de00ef8d6e59d96698fa06433dba63fccc2 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 26 Mar 2026 17:13:15 +0100 Subject: [PATCH 26/31] fix(cdn): change HTTP method to GET for fetching manifest --- .../internal/persistedoperation/operationstorage/cdn/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/internal/persistedoperation/operationstorage/cdn/client.go b/router/internal/persistedoperation/operationstorage/cdn/client.go index d422ab89af..11a9c4659f 100644 --- a/router/internal/persistedoperation/operationstorage/cdn/client.go +++ b/router/internal/persistedoperation/operationstorage/cdn/client.go @@ -159,7 +159,7 @@ func (cdn *Client) persistedOperation(ctx context.Context, clientName string, sh func (cdn *Client) FetchManifest(ctx context.Context, manifestPath string) ([]byte, error) { manifestURL := cdn.cdnURL.ResolveReference(&url.URL{Path: manifestPath}) - req, err := http.NewRequestWithContext(ctx, "POST", manifestURL.String(), nil) + req, err := http.NewRequestWithContext(ctx, "GET", manifestURL.String(), nil) if err != nil { return nil, err } From 8af64f05eb2ff308984a444378ad1839285cd4a9 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 26 Mar 2026 17:42:38 +0100 Subject: [PATCH 27/31] feat(pql): implement ReadManifest method for fetching PQL manifests from CDN and filesystem --- router-tests/operations/pql_manifest_test.go | 6 +- router/core/router.go | 21 ++++--- router/demo.config.yaml | 4 ++ router/internal/persistedoperation/client.go | 2 +- .../operationstorage/cdn/client.go | 58 +++++++++---------- .../operationstorage/fs/client.go | 7 +-- .../operationstorage/s3/client.go | 12 +++- .../persistedoperation/pqlmanifest/fetcher.go | 7 +++ .../pqlmanifest/fetcher_test.go | 1 + .../persistedoperation/pqlmanifest/poller.go | 13 ++--- .../pqlmanifest/poller_test.go | 22 +++---- .../persistedoperation/pqlmanifest/store.go | 19 ++++-- 12 files changed, 94 insertions(+), 78 deletions(-) diff --git a/router-tests/operations/pql_manifest_test.go b/router-tests/operations/pql_manifest_test.go index 4256a8a009..27770384c2 100644 --- a/router-tests/operations/pql_manifest_test.go +++ b/router-tests/operations/pql_manifest_test.go @@ -588,10 +588,10 @@ func TestPQLManifest(t *testing.T) { cdnServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if strings.HasSuffix(r.URL.Path, "/operations/manifest.json") { - w.WriteHeader(http.StatusInternalServerError) + // Return 404 (not 500) to avoid retryablehttp's 5 retries with exponential backoff. + w.WriteHeader(http.StatusNotFound) return } - // Serve other CDN requests normally w.WriteHeader(http.StatusNotFound) })) defer cdnServer.Close() @@ -608,7 +608,7 @@ func TestPQLManifest(t *testing.T) { }), }, }, func(t *testing.T, err error) { - require.ErrorContains(t, err, "failed to fetch initial PQL manifest") + require.ErrorContains(t, err, "PQL manifest not found on CDN") }) }) } diff --git a/router/core/router.go b/router/core/router.go index a34fc6c1d3..6ec2dfa316 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -1260,8 +1260,6 @@ func (r *Router) buildClients(ctx context.Context) error { if r.persistedOperationsConfig.Manifest.Enabled && !r.persistedOperationsConfig.Disabled { const manifestFileName = "manifest.json" - pqlStore = pqlmanifest.NewStore(r.logger) - storageProviderID := r.persistedOperationsConfig.Storage.ProviderID if _, ok := fileSystemProviders[storageProviderID]; ok { @@ -1269,24 +1267,23 @@ func (r *Router) buildClients(ctx context.Context) error { } if storageProviderID != "" { - // An explicit storage provider is configured — use the already-created client to fetch the manifest once at startup. + // An explicit storage provider is configured — read the manifest once at startup. objectPrefix := r.persistedOperationsConfig.Storage.ObjectPrefix objectPath := manifestFileName if objectPrefix != "" { objectPath = path.Join(objectPrefix, manifestFileName) } - data, err := pClient.FetchManifest(ctx, objectPath) + manifest, err := pClient.ReadManifest(ctx, objectPath) if err != nil { return fmt.Errorf("failed to fetch PQL manifest from storage provider %q: %w", - r.persistedOperationsConfig.Storage.ProviderID, err) - } - if err := pqlStore.LoadFromData(data); err != nil { - return fmt.Errorf("failed to parse PQL manifest from storage provider %q: %w", - r.persistedOperationsConfig.Storage.ProviderID, err) + storageProviderID, err) } + + pqlStore = pqlmanifest.NewStore(r.logger) + pqlStore.Load(manifest) r.logger.Info("Loaded PQL manifest from storage provider", - zap.String("provider_id", r.persistedOperationsConfig.Storage.ProviderID), + zap.String("provider_id", storageProviderID), zap.Int("operations", pqlStore.OperationCount()), ) } else { @@ -1301,7 +1298,7 @@ func (r *Router) buildClients(ctx context.Context) error { } poller := pqlmanifest.NewPoller( - fetcher, pqlStore, + fetcher, r.persistedOperationsConfig.Manifest.PollInterval, r.persistedOperationsConfig.Manifest.PollJitter, r.logger, @@ -1312,6 +1309,8 @@ func (r *Router) buildClients(ctx context.Context) error { } go poller.Poll(ctx) + + pqlStore = fetcher.Store() } // Manifest is authoritative — individual operation fetches are not needed. diff --git a/router/demo.config.yaml b/router/demo.config.yaml index ccea543c6d..be39ac3572 100644 --- a/router/demo.config.yaml +++ b/router/demo.config.yaml @@ -5,6 +5,10 @@ version: "1" +persisted_operations: + manifest: + enabled: true + events: providers: nats: diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index 29c05dc1d0..48f48bc9bd 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -27,7 +27,7 @@ func (e PersistentOperationNotFoundError) Error() string { type StorageClient interface { PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) - FetchManifest(ctx context.Context, objectPath string) ([]byte, error) + ReadManifest(ctx context.Context, objectPath string) (*pqlmanifest.Manifest, error) Close() } diff --git a/router/internal/persistedoperation/operationstorage/cdn/client.go b/router/internal/persistedoperation/operationstorage/cdn/client.go index 11a9c4659f..02a7d6906d 100644 --- a/router/internal/persistedoperation/operationstorage/cdn/client.go +++ b/router/internal/persistedoperation/operationstorage/cdn/client.go @@ -13,6 +13,7 @@ import ( "github.com/wundergraph/cosmo/router/internal/httpclient" "github.com/wundergraph/cosmo/router/internal/jwt" "github.com/wundergraph/cosmo/router/internal/persistedoperation" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" "go.opentelemetry.io/otel/codes" semconv12 "go.opentelemetry.io/otel/semconv/v1.12.0" semconv "go.opentelemetry.io/otel/semconv/v1.17.0" @@ -40,6 +41,7 @@ type Client struct { organizationID string httpClient *http.Client logger *zap.Logger + fetcher *pqlmanifest.Fetcher } // NewClient creates a new CDN Client. URL is the URL of the CDN. @@ -64,6 +66,11 @@ func NewClient(endpoint string, token string, opts Options) (*Client, error) { zap.String("url", endpoint), ) + fetcher, err := pqlmanifest.NewFetcher(endpoint, token, logger) + if err != nil { + return nil, fmt.Errorf("failed to create manifest fetcher: %w", err) + } + return &Client{ cdnURL: u, authenticationToken: token, @@ -71,6 +78,7 @@ func NewClient(endpoint string, token string, opts Options) (*Client, error) { organizationID: url.PathEscape(claims.OrganizationID), httpClient: httpclient.NewRetryableHTTPClient(logger), logger: logger, + fetcher: fetcher, }, nil } @@ -155,38 +163,6 @@ func (cdn *Client) persistedOperation(ctx context.Context, clientName string, sh return []byte(po.Body), nil } -// FetchManifest fetches a PQL manifest from the CDN at the given path and returns the raw bytes. -func (cdn *Client) FetchManifest(ctx context.Context, manifestPath string) ([]byte, error) { - manifestURL := cdn.cdnURL.ResolveReference(&url.URL{Path: manifestPath}) - - req, err := http.NewRequestWithContext(ctx, "GET", manifestURL.String(), nil) - if err != nil { - return nil, err - } - - cdn.setCDNHeaders(req) - - resp, err := cdn.httpClient.Do(req) - if err != nil { - return nil, err - } - defer func() { - _ = resp.Body.Close() - }() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("CDN returned status %d when fetching persistent operation manifest", resp.StatusCode) - } - - reader, cleanup, err := gzipAwareReader(resp) - if err != nil { - return nil, err - } - defer cleanup() - - return io.ReadAll(reader) -} - // setCDNHeaders sets the common headers for CDN requests. func (cdn *Client) setCDNHeaders(req *http.Request) { req.Header.Set("Content-Type", "application/json; charset=UTF-8") @@ -207,4 +183,22 @@ func gzipAwareReader(resp *http.Response) (io.Reader, func(), error) { return resp.Body, func() {}, nil } +// ReadManifest fetches the PQL manifest from the CDN, delegating to the manifest Fetcher. +// The objectPath parameter is unused — the Fetcher constructs the path from JWT claims. +func (cdn *Client) ReadManifest(ctx context.Context, _ string) (*pqlmanifest.Manifest, error) { + manifest, _, err := cdn.fetcher.Fetch(ctx, "") + if err != nil { + return nil, err + } + if manifest == nil { + return nil, fmt.Errorf("no manifest returned from CDN") + } + return manifest, nil +} + +// Fetcher returns the manifest fetcher for use with polling. +func (cdn *Client) Fetcher() *pqlmanifest.Fetcher { + return cdn.fetcher +} + func (cdn *Client) Close() {} diff --git a/router/internal/persistedoperation/operationstorage/fs/client.go b/router/internal/persistedoperation/operationstorage/fs/client.go index a99dfef916..d808c11c33 100644 --- a/router/internal/persistedoperation/operationstorage/fs/client.go +++ b/router/internal/persistedoperation/operationstorage/fs/client.go @@ -8,6 +8,7 @@ import ( "path/filepath" "github.com/wundergraph/cosmo/router/internal/persistedoperation" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" ) type client struct { @@ -69,10 +70,8 @@ func (c client) persistedOperation(clientName string, sha256Hash string) ([]byte return []byte(po.Body), nil } -// FetchManifest reads a PQL manifest from the filesystem at the given path and returns the raw bytes. -func (c client) FetchManifest(_ context.Context, manifestPath string) ([]byte, error) { - fullPath := filepath.Join(c.path, c.options.ObjectPathPrefix, manifestPath) - return os.ReadFile(fullPath) +func (c client) ReadManifest(_ context.Context, _ string) (*pqlmanifest.Manifest, error) { + return nil, fmt.Errorf("filesystem storage provider does not support reading manifests; use S3 or CDN instead") } func (c client) Close() {} diff --git a/router/internal/persistedoperation/operationstorage/s3/client.go b/router/internal/persistedoperation/operationstorage/s3/client.go index c67e408566..ebdbccc6ee 100644 --- a/router/internal/persistedoperation/operationstorage/s3/client.go +++ b/router/internal/persistedoperation/operationstorage/s3/client.go @@ -10,6 +10,7 @@ import ( "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/credentials" "github.com/wundergraph/cosmo/router/internal/persistedoperation" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace" ) @@ -108,8 +109,8 @@ func (c Client) persistedOperation(ctx context.Context, clientName, sha256Hash s return []byte(po.Body), nil } -// FetchManifest fetches a PQL manifest from S3 at the given object path and returns the raw bytes. -func (c Client) FetchManifest(ctx context.Context, objectPath string) ([]byte, error) { +// ReadManifest fetches and parses a PQL manifest from S3 at the given object path. +func (c Client) ReadManifest(ctx context.Context, objectPath string) (*pqlmanifest.Manifest, error) { reader, err := c.client.GetObject(ctx, c.options.BucketName, objectPath, minio.GetObjectOptions{}) if err != nil { return nil, fmt.Errorf("failed to get manifest from S3: %w", err) @@ -118,7 +119,12 @@ func (c Client) FetchManifest(ctx context.Context, objectPath string) ([]byte, e _ = reader.Close() }() - return io.ReadAll(reader) + data, err := io.ReadAll(reader) + if err != nil { + return nil, fmt.Errorf("failed to read manifest from S3: %w", err) + } + + return pqlmanifest.ParseManifest(data) } func (c Client) Close() {} diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher.go b/router/internal/persistedoperation/pqlmanifest/fetcher.go index 956d08ca80..23937301fe 100644 --- a/router/internal/persistedoperation/pqlmanifest/fetcher.go +++ b/router/internal/persistedoperation/pqlmanifest/fetcher.go @@ -26,6 +26,7 @@ type Fetcher struct { organizationID string httpClient *http.Client logger *zap.Logger + store *Store } // NewFetcher creates a new manifest fetcher. It reuses JWT extraction and HTTP client @@ -57,9 +58,15 @@ func NewFetcher(endpoint, token string, logger *zap.Logger) (*Fetcher, error) { organizationID: url.PathEscape(claims.OrganizationID), httpClient: httpclient.NewRetryableHTTPClient(logger), logger: logger, + store: NewStore(logger), }, nil } +// Store returns the underlying manifest store for read access (lookups, revision). +func (f *Fetcher) Store() *Store { + return f.store +} + // Fetch downloads the manifest from the CDN. It GETs /{orgId}/{fedGraphId}/operations/manifest.json // with Bearer auth, using If-None-Match for conditional requests. The CDN returns 304 Not Modified // when the ETag matches, avoiding a full download. Returns (manifest, changed, err). diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher_test.go b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go index da486b7407..5469ffb307 100644 --- a/router/internal/persistedoperation/pqlmanifest/fetcher_test.go +++ b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go @@ -22,6 +22,7 @@ func newTestFetcher(serverURL string) *Fetcher { organizationID: "org-id", httpClient: &http.Client{}, logger: zap.NewNop(), + store: NewStore(zap.NewNop()), } } diff --git a/router/internal/persistedoperation/pqlmanifest/poller.go b/router/internal/persistedoperation/pqlmanifest/poller.go index c84ca3e259..b85be2ae6f 100644 --- a/router/internal/persistedoperation/pqlmanifest/poller.go +++ b/router/internal/persistedoperation/pqlmanifest/poller.go @@ -10,13 +10,12 @@ import ( type Poller struct { fetcher *Fetcher - store *Store pollInterval time.Duration pollJitter time.Duration logger *zap.Logger } -func NewPoller(fetcher *Fetcher, store *Store, pollInterval, pollJitter time.Duration, logger *zap.Logger) *Poller { +func NewPoller(fetcher *Fetcher, pollInterval, pollJitter time.Duration, logger *zap.Logger) *Poller { if pollJitter <= 0 { pollJitter = 5 * time.Second } @@ -28,7 +27,6 @@ func NewPoller(fetcher *Fetcher, store *Store, pollInterval, pollJitter time.Dur } return &Poller{ fetcher: fetcher, - store: store, pollInterval: pollInterval, pollJitter: pollJitter, logger: logger, @@ -43,7 +41,7 @@ func (p *Poller) FetchInitial(ctx context.Context) error { } if changed && manifest != nil { - p.store.Load(manifest) + p.fetcher.Store().Load(manifest) p.logger.Info("Loaded initial PQL manifest", zap.String("revision", manifest.Revision), zap.Int("operation_count", len(manifest.Operations)), @@ -54,9 +52,10 @@ func (p *Poller) FetchInitial(ctx context.Context) error { } // Poll runs a background goroutine loop that periodically fetches the manifest. -// It sleeps for pollInterval + random jitter, fetches, and if changed calls store.Load(). +// It sleeps for pollInterval + random jitter, fetches, and if changed updates the store. // It exits when ctx is cancelled. func (p *Poller) Poll(ctx context.Context) { + store := p.fetcher.Store() for { jitter := time.Duration(rand.Int63n(int64(p.pollJitter + 1))) sleepDuration := p.pollInterval + jitter @@ -67,7 +66,7 @@ func (p *Poller) Poll(ctx context.Context) { case <-time.After(sleepDuration): } - currentRevision := p.store.Revision() + currentRevision := store.Revision() manifest, changed, err := p.fetcher.Fetch(ctx, currentRevision) if err != nil { p.logger.Warn("Failed to fetch PQL manifest", zap.Error(err)) @@ -75,7 +74,7 @@ func (p *Poller) Poll(ctx context.Context) { } if changed && manifest != nil { - p.store.Load(manifest) + store.Load(manifest) p.logger.Debug("Updated PQL manifest", zap.String("revision", manifest.Revision), zap.String("previous_revision", currentRevision), diff --git a/router/internal/persistedoperation/pqlmanifest/poller_test.go b/router/internal/persistedoperation/pqlmanifest/poller_test.go index f71bf2f995..61a85f4366 100644 --- a/router/internal/persistedoperation/pqlmanifest/poller_test.go +++ b/router/internal/persistedoperation/pqlmanifest/poller_test.go @@ -25,11 +25,12 @@ func TestPoller_FetchInitial(t *testing.T) { defer server.Close() f := newTestFetcher(server.URL) - store := NewStore(zap.NewNop()) - poller := NewPoller(f, store, 10*time.Second, 1*time.Second, zap.NewNop()) + poller := NewPoller(f, 10*time.Second, 1*time.Second, zap.NewNop()) err := poller.FetchInitial(context.Background()) require.NoError(t, err) + + store := f.Store() require.True(t, store.IsLoaded()) require.Equal(t, m.Revision, store.Revision()) require.Equal(t, len(m.Operations), store.OperationCount()) @@ -42,12 +43,11 @@ func TestPoller_FetchInitialError(t *testing.T) { defer server.Close() f := newTestFetcher(server.URL) - store := NewStore(zap.NewNop()) - poller := NewPoller(f, store, 10*time.Second, 1*time.Second, zap.NewNop()) + poller := NewPoller(f, 10*time.Second, 1*time.Second, zap.NewNop()) err := poller.FetchInitial(context.Background()) require.Error(t, err) - require.False(t, store.IsLoaded()) + require.False(t, f.Store().IsLoaded()) } func TestPoller_PollUpdatesManifest(t *testing.T) { @@ -83,12 +83,13 @@ func TestPoller_PollUpdatesManifest(t *testing.T) { defer server.Close() f := newTestFetcher(server.URL) - store := NewStore(zap.NewNop()) - poller := NewPoller(f, store, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + poller := NewPoller(f, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) // Initial fetch err := poller.FetchInitial(context.Background()) require.NoError(t, err) + + store := f.Store() require.Equal(t, manifestV1.Revision, store.Revision()) require.Equal(t, len(manifestV1.Operations), store.OperationCount()) @@ -131,8 +132,7 @@ func TestPoller_PollStopsOnContextCancel(t *testing.T) { defer server.Close() f := newTestFetcher(server.URL) - store := NewStore(zap.NewNop()) - poller := NewPoller(f, store, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + poller := NewPoller(f, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) ctx, cancel := context.WithCancel(context.Background()) go poller.Poll(ctx) @@ -170,13 +170,13 @@ func TestPoller_PollContinuesOnFetchError(t *testing.T) { defer server.Close() f := newTestFetcher(server.URL) - store := NewStore(zap.NewNop()) - poller := NewPoller(f, store, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + poller := NewPoller(f, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) ctx, cancel := context.WithCancel(context.Background()) defer cancel() go poller.Poll(ctx) + store := f.Store() require.Eventually(t, func() bool { return store.IsLoaded() && store.Revision() == m.Revision }, 5*time.Second, 10*time.Millisecond) diff --git a/router/internal/persistedoperation/pqlmanifest/store.go b/router/internal/persistedoperation/pqlmanifest/store.go index 81912754ab..ab3577afa3 100644 --- a/router/internal/persistedoperation/pqlmanifest/store.go +++ b/router/internal/persistedoperation/pqlmanifest/store.go @@ -57,18 +57,25 @@ func (s *Store) LoadFromFile(path string) error { return s.LoadFromData(data) } -// LoadFromData parses and validates manifest JSON data and loads it into the store. -func (s *Store) LoadFromData(data []byte) error { +// ParseManifest parses and validates manifest JSON data. +func ParseManifest(data []byte) (*Manifest, error) { var manifest Manifest if err := json.Unmarshal(data, &manifest); err != nil { - return fmt.Errorf("failed to parse manifest: %w", err) + return nil, fmt.Errorf("failed to parse manifest: %w", err) } - if err := validateManifest(&manifest); err != nil { - return fmt.Errorf("invalid manifest: %w", err) + return nil, fmt.Errorf("invalid manifest: %w", err) } + return &manifest, nil +} - s.Load(&manifest) +// LoadFromData parses and validates manifest JSON data and loads it into the store. +func (s *Store) LoadFromData(data []byte) error { + manifest, err := ParseManifest(data) + if err != nil { + return err + } + s.Load(manifest) return nil } From 9db9c6b0bee69dc9272e75714b4fc0a3d052ac39 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 26 Mar 2026 17:46:28 +0100 Subject: [PATCH 28/31] chore(config): remove unused persisted_operations section from demo.config.yaml --- router/demo.config.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/router/demo.config.yaml b/router/demo.config.yaml index be39ac3572..ccea543c6d 100644 --- a/router/demo.config.yaml +++ b/router/demo.config.yaml @@ -5,10 +5,6 @@ version: "1" -persisted_operations: - manifest: - enabled: true - events: providers: nats: From 062a4ee37f3b5baf862e374d63e90071f2299596 Mon Sep 17 00:00:00 2001 From: StarpTech Date: Thu, 26 Mar 2026 17:56:23 +0100 Subject: [PATCH 29/31] docs(push): clarify client-name option usage in push.mdx --- docs-website/cli/operations/push.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs-website/cli/operations/push.mdx b/docs-website/cli/operations/push.mdx index ec229d9479..a2009ada48 100644 --- a/docs-website/cli/operations/push.mdx +++ b/docs-website/cli/operations/push.mdx @@ -25,7 +25,7 @@ The operations are validated against the graph schema before registering them. I ### Options - `-n, --namespace` : The namespace of the federated graph (Default: "default"). -- `-c, --client-name ` The client name / identifier to register these operations under. +- `-c, --client-name ` The client name / identifier to register these operations under. By default, the client name must be sent by the client at request time via the `graphql-client-name` HTTP header. When the router runs in [manifest mode](/router/persisted-queries/persisted-operations#pql-manifest), the client name is used only for organizational structure in the Studio (e.g. you could use `"default"`) and is not matched against incoming requests. - `-f, --file ` Filename to read operations from. This argument can be used multiple times to read operations from multiple files. See [Description](/cli/operations/push#description) for a list of the supported file formats. - `-q, --quiet` Don't produce any output, just exit with success or error. Intended for use in CI checks. - `--format ` Specify an output format for the returned data. Support types include `text` (the default) and `json.` \ No newline at end of file From 52d41a302e91d29b8fbe3ae3e7575ed47142695a Mon Sep 17 00:00:00 2001 From: JivusAyrus Date: Fri, 27 Mar 2026 11:39:27 +0530 Subject: [PATCH 30/31] feat(docs): enhance tutorial on using persisted operations with PQL manifest details --- .../tutorial/using-persisted-operations.mdx | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/docs-website/tutorial/using-persisted-operations.mdx b/docs-website/tutorial/using-persisted-operations.mdx index 258c1e85ad..ba5f910ecc 100644 --- a/docs-website/tutorial/using-persisted-operations.mdx +++ b/docs-website/tutorial/using-persisted-operations.mdx @@ -84,7 +84,7 @@ There are a few things to note from this command: * The first argument is the federated graph name to push the operations to. This is the federated graph we created while following , named `federation`. -* After the federated subgraph name, we must also indicate a client name. Persisted operations in Cosmo are always associated with a given client. If needed, Cosmo will automatically register the given client name the first time it sees it. During operation execution, the client name is obtained from the `graphql-client-name` HTTP header. +* After the federated graph name, we must also indicate a client name. Persisted operations in Cosmo are always associated with a given client. If needed, Cosmo will automatically register the given client name the first time it sees it. During operation execution, the client name is obtained from the `graphql-client-name` HTTP header. When the router runs in [manifest mode](/router/persisted-queries/persisted-operations#pql-manifest), the client name is used only to structure operations in Studio. * Finally, we specify one or more files that contain GraphQL operations. Here we're using a plain `.graphql` file, but other formats are also supported, including: @@ -116,8 +116,26 @@ curl 'http://127.0.0.1:3002/graphql' \ This will return the same data as executing the operation by returning its contents. +## Using the PQL Manifest + +Instead of having the router fetch each persisted operation individually from the CDN on each request, you can enable the **PQL manifest**. In this mode, the router loads all persisted operations from a single `manifest.json` file at startup and serves them. + +Add the following to your router configuration: + +```yaml +persisted_operations: + manifest: + enabled: true +``` + +The manifest is automatically kept in sync with the Cosmo CDN whenever you push or delete operations. The router polls for updates and hot-reloads without a restart. + +You can also load the manifest from a custom [storage provider](/router/storage-providers) (e.g. S3-compatible storage) instead of the Cosmo CDN. For details, see [PQL Manifest](/router/persisted-queries/persisted-operations#pql-manifest). + ## Further information -* Check the router documentation for [Persisted Operations](/router/persisted-queries/persisted-operations). +* [Persisted Operations](/router/persisted-queries/persisted-operations) — full reference including manifest formats, manifest mode, custom storage providers, and security options. + +* [Push command](/cli/operations/push) — CLI reference for `wgc operations push`. -* Check the documentation for [Push](/cli/operations/push) command. +* [Storage Providers](/router/storage-providers) — configure S3 or other storage backends for router artifacts. From 93befb6e506678788aade3c592a9c620a5fcc15e Mon Sep 17 00:00:00 2001 From: StarpTech Date: Fri, 27 Mar 2026 13:44:27 +0100 Subject: [PATCH 31/31] feat(manifest): add cache warmup configuration for PQL operations --- .../persisted-operations.mdx | 24 +++ router-tests/operations/pql_manifest_test.go | 183 ++++++++++++++++-- router/core/cache_warmup.go | 6 +- router/core/cache_warmup_manifest.go | 47 +++++ router/core/cache_warmup_manifest_test.go | 86 ++++++++ router/core/graph_server.go | 50 +++++ router/core/operation_processor.go | 14 +- router/internal/persistedoperation/client.go | 15 +- .../pqlmanifest/fetcher_test.go | 6 + .../pqlmanifest/poller_test.go | 5 + .../persistedoperation/pqlmanifest/store.go | 23 ++- .../pqlmanifest/store_test.go | 42 ++++ .../authentication/oidc_discovery_client.go | 2 + router/pkg/config/config.go | 16 +- router/pkg/config/config.schema.json | 33 ++++ router/pkg/config/config_test.go | 5 + .../pkg/config/testdata/config_defaults.json | 8 +- router/pkg/config/testdata/config_full.json | 8 +- 18 files changed, 534 insertions(+), 39 deletions(-) create mode 100644 router/core/cache_warmup_manifest.go create mode 100644 router/core/cache_warmup_manifest_test.go diff --git a/docs-website/router/persisted-queries/persisted-operations.mdx b/docs-website/router/persisted-queries/persisted-operations.mdx index 18c27d449d..7391394b98 100644 --- a/docs-website/router/persisted-queries/persisted-operations.mdx +++ b/docs-website/router/persisted-queries/persisted-operations.mdx @@ -132,6 +132,30 @@ The manifest is automatically updated in the Cosmo CDN whenever operations are a When the manifest is enabled, it is **authoritative** — the router does not fall back to fetching individual operations from the CDN. Unknown operation hashes are rejected immediately. +### Cache warmup + +When the PQL manifest is enabled, the router automatically warms up its caches by pre-processing all operations from the manifest at startup and after each manifest update. Each operation goes through parsing, normalization, validation, and query planning — the same steps that happen on a regular request. Once warmed, every operation is served entirely from cache with zero processing overhead on the first request. + +Cache warmup is enabled by default and can be configured or disabled: + +```yaml +persisted_operations: + manifest: + enabled: true + warmup: + enabled: true # default: true + workers: 4 # number of concurrent workers (default: 4) + items_per_second: 50 # rate limit, 0 = unlimited (default: 50) + timeout: 30s # maximum time for warmup to complete (default: 30s) +``` + +| Option | Default | Description | +|--------|---------|-------------| +| `enabled` | `true` | Set to `false` to disable cache warmup for manifest operations. | +| `workers` | `4` | Number of concurrent workers used to pre-process operations. Increase for large manifests. | +| `items_per_second` | `50` | Rate limit for items processed per second. Set to `0` for unlimited throughput. | +| `timeout` | `30s` | Maximum time allowed for the warmup to complete. If the timeout is exceeded, the router logs an error but continues serving requests normally. | + ## Using a custom storage provider You can load persisted operations from your own S3-compatible storage instead of the Cosmo CDN. First, [define a storage provider](/router/storage-providers), then reference it in your persisted operations configuration. diff --git a/router-tests/operations/pql_manifest_test.go b/router-tests/operations/pql_manifest_test.go index 27770384c2..cdba64f0c3 100644 --- a/router-tests/operations/pql_manifest_test.go +++ b/router-tests/operations/pql_manifest_test.go @@ -10,7 +10,6 @@ import ( "testing" "time" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/wundergraph/cosmo/router-tests/testenv" "github.com/wundergraph/cosmo/router/core" @@ -49,6 +48,19 @@ func TestPQLManifest(t *testing.T) { }, } + manifestConfigWithWarmup := config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + Warmup: config.PQLManifestWarmupConfig{ + Enabled: true, + Workers: 4, + Timeout: 30 * time.Second, + }, + }, + } + t.Run("lookup succeeds for known operations", func(t *testing.T) { t.Parallel() testenv.Run(t, &testenv.Config{ @@ -420,13 +432,12 @@ func TestPQLManifest(t *testing.T) { }) }) - t.Run("manifest update invalidates normalization cache", func(t *testing.T) { + t.Run("manifest reload preserves cache hits", func(t *testing.T) { t.Parallel() employeesHash := "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f" employeesQuery := "query Employees {\n employees {\n id\n }\n}" - // manifestV1 has the Employees operation manifestV1, _ := json.Marshal(map[string]interface{}{ "version": 1, "revision": "rev-v1", @@ -435,28 +446,30 @@ func TestPQLManifest(t *testing.T) { employeesHash: employeesQuery, }, }) - // manifestV2 removes the Employees operation + // manifestV2 has the same operation but a new revision manifestV2, _ := json.Marshal(map[string]interface{}{ "version": 1, "revision": "rev-v2", "generatedAt": "2024-01-02T00:00:00Z", - "operations": map[string]string{}, + "operations": map[string]string{ + employeesHash: employeesQuery, + }, }) var currentManifest atomic.Value currentManifest.Store(manifestV1) + var manifestFetchCount atomic.Int32 + cdnServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if strings.HasSuffix(r.URL.Path, "/operations/manifest.json") { manifest := currentManifest.Load().([]byte) - // Parse manifest to get its revision var m struct { Revision string `json:"revision"` } _ = json.Unmarshal(manifest, &m) - // Check If-None-Match header for ETag-based conditional request ifNoneMatch := r.Header.Get("If-None-Match") if ifNoneMatch == `"`+m.Revision+`"` { w.Header().Set("ETag", ifNoneMatch) @@ -464,6 +477,7 @@ func TestPQLManifest(t *testing.T) { return } + manifestFetchCount.Add(1) w.Header().Set("Content-Type", "application/json") w.Header().Set("ETag", `"`+m.Revision+`"`) w.WriteHeader(http.StatusOK) @@ -471,7 +485,6 @@ func TestPQLManifest(t *testing.T) { return } - // For non-manifest requests, return 404 w.WriteHeader(http.StatusNotFound) })) defer cdnServer.Close() @@ -484,6 +497,11 @@ func TestPQLManifest(t *testing.T) { Enabled: true, PollInterval: 100 * time.Millisecond, PollJitter: 5 * time.Millisecond, + Warmup: config.PQLManifestWarmupConfig{ + Enabled: true, + Workers: 4, + Timeout: 30 * time.Second, + }, }, }), }, @@ -491,7 +509,7 @@ func TestPQLManifest(t *testing.T) { header := make(http.Header) header.Add("graphql-client-name", "my-client") - // 1. Operation succeeds with manifest v1 + // 1. First request is a cache HIT from warmup res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ OperationName: []byte(`"Employees"`), Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "` + employeesHash + `"}}`), @@ -499,8 +517,18 @@ func TestPQLManifest(t *testing.T) { }) require.NoError(t, err) require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader)) - // 2. Make the same request again to populate the normalization cache + // 2. Swap to manifest v2 (new revision, same operations) + currentManifest.Store(manifestV2) + + // 3. Wait for the poller to pick up the new manifest + require.Eventually(t, func() bool { + return manifestFetchCount.Load() >= 2 + }, 5*time.Second, 50*time.Millisecond) + + // 4. After manifest reload, the operation should still be a cache HIT + // because the SHA is the same — no revision in the cache key. res, err = xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ OperationName: []byte(`"Employees"`), Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "` + employeesHash + `"}}`), @@ -508,19 +536,68 @@ func TestPQLManifest(t *testing.T) { }) require.NoError(t, err) require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + }) + }) - // 3. Swap to manifest v2 (which removes the operation) - currentManifest.Store(manifestV2) + t.Run("manifest warmup serves first request from cache", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfigWithWarmup), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // The very first request should hit ALL caches because the manifest warmup + // pre-processed all operations through the full pipeline at startup. + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.NormalizationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.VariablesNormalizationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.VariablesRemappingCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.ExecutionPlanCacheHeader)) + }) + }) + + t.Run("manifest warmup cache hit is independent of client name", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfigWithWarmup), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warmup runs without a client name. Requests from any client should still hit + // all caches because PQL manifest cache keys exclude clientName. + for _, clientName := range []string{"client-a", "client-b", "another-client"} { + header := make(http.Header) + header.Add("graphql-client-name", clientName) - // 4. Wait for poller to pick up the new manifest and cache to be invalidated - require.EventuallyWithT(t, func(ct *assert.CollectT) { - res = xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ OperationName: []byte(`"Employees"`), - Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "` + employeesHash + `"}}`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), Header: header, }) - assert.Equal(ct, persistedNotFoundResp, res.Body) - }, 5*time.Second, 100*time.Millisecond) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader), + "expected persisted operation cache HIT for client %q", clientName) + require.Equal(t, "HIT", res.Response.Header.Get(core.NormalizationCacheHeader), + "expected normalization cache HIT for client %q", clientName) + require.Equal(t, "HIT", res.Response.Header.Get(core.VariablesNormalizationCacheHeader), + "expected variables normalization cache HIT for client %q", clientName) + require.Equal(t, "HIT", res.Response.Header.Get(core.VariablesRemappingCacheHeader), + "expected variables remapping cache HIT for client %q", clientName) + require.Equal(t, "HIT", res.Response.Header.Get(core.ExecutionPlanCacheHeader), + "expected execution plan cache HIT for client %q", clientName) + } }) }) @@ -583,6 +660,76 @@ func TestPQLManifest(t *testing.T) { }) }) + t.Run("warmup disabled skips cache pre-processing", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + Warmup: config.PQLManifestWarmupConfig{ + Enabled: false, + }, + }, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // With warmup disabled, the first request should still resolve the persisted operation + // from the manifest, but all processing caches should be cold. + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "MISS", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + require.Equal(t, "MISS", res.Response.Header.Get(core.NormalizationCacheHeader)) + require.Equal(t, "MISS", res.Response.Header.Get(core.ExecutionPlanCacheHeader)) + }) + }) + + t.Run("warmup with custom workers and timeout", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + Warmup: config.PQLManifestWarmupConfig{ + Enabled: true, + Workers: 2, + ItemsPerSecond: 100, + Timeout: 10 * time.Second, + }, + }, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // With custom warmup config, all caches should still be warm on the first request. + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.NormalizationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.ExecutionPlanCacheHeader)) + }) + }) + t.Run("fails to start when initial CDN manifest fetch fails", func(t *testing.T) { t.Parallel() diff --git a/router/core/cache_warmup.go b/router/core/cache_warmup.go index 67af83b158..9204d4091f 100644 --- a/router/core/cache_warmup.go +++ b/router/core/cache_warmup.go @@ -62,7 +62,7 @@ func WarmupCaches(ctx context.Context, cfg *CacheWarmupConfig) (err error) { if cfg.Timeout <= 0 { w.timeout = time.Second * 30 } - w.log.Info("Warmup started", + w.log.Debug("Warmup started", zap.Int("workers", cfg.Workers), zap.Int("items_per_second", cfg.ItemsPerSecond), zap.Duration("timeout", cfg.Timeout), @@ -84,7 +84,7 @@ func WarmupCaches(ctx context.Context, cfg *CacheWarmupConfig) (err error) { ) return err } - w.log.Info("Warmup completed", + w.log.Debug("Warmup completed", zap.Int("processed_items", completed), zap.Duration("duration", time.Since(start)), ) @@ -123,7 +123,7 @@ func (w *cacheWarmup) run(ctx context.Context) (int, error) { return 0, nil } - w.log.Info("Starting processing", + w.log.Debug("Starting processing", zap.Int("items", len(items)), ) diff --git a/router/core/cache_warmup_manifest.go b/router/core/cache_warmup_manifest.go new file mode 100644 index 0000000000..0a886da6df --- /dev/null +++ b/router/core/cache_warmup_manifest.go @@ -0,0 +1,47 @@ +package core + +import ( + "context" + + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" + "go.uber.org/zap" +) + +var _ CacheWarmupSource = (*ManifestWarmupSource)(nil) + +type ManifestWarmupSource struct { + store *pqlmanifest.Store +} + +func NewManifestWarmupSource(store *pqlmanifest.Store) *ManifestWarmupSource { + return &ManifestWarmupSource{ + store: store, + } +} + +func (s *ManifestWarmupSource) LoadItems(ctx context.Context, log *zap.Logger) ([]*nodev1.Operation, error) { + ops := s.store.AllOperations() + if len(ops) == 0 { + log.Debug("No operations in PQL manifest for warmup") + return nil, nil + } + + items := make([]*nodev1.Operation, 0, len(ops)) + for sha256Hash, body := range ops { + items = append(items, &nodev1.Operation{ + Request: &nodev1.OperationRequest{ + Query: body, + Extensions: &nodev1.Extension{ + PersistedQuery: &nodev1.PersistedQuery{ + Sha256Hash: sha256Hash, + Version: 1, + }, + }, + }, + }) + } + + log.Debug("Loaded PQL manifest operations for warmup", zap.Int("count", len(items))) + return items, nil +} diff --git a/router/core/cache_warmup_manifest_test.go b/router/core/cache_warmup_manifest_test.go new file mode 100644 index 0000000000..05aadbab9a --- /dev/null +++ b/router/core/cache_warmup_manifest_test.go @@ -0,0 +1,86 @@ +package core + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" + "go.uber.org/zap" +) + +func TestManifestWarmupSource(t *testing.T) { + t.Parallel() + + t.Run("returns nil when store has no manifest", func(t *testing.T) { + t.Parallel() + store := pqlmanifest.NewStore(zap.NewNop()) + source := NewManifestWarmupSource(store) + + items, err := source.LoadItems(context.Background(), zap.NewNop()) + require.NoError(t, err) + require.Nil(t, items) + }) + + t.Run("returns nil when manifest has no operations", func(t *testing.T) { + t.Parallel() + store := pqlmanifest.NewStore(zap.NewNop()) + store.Load(&pqlmanifest.Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{}, + }) + source := NewManifestWarmupSource(store) + + items, err := source.LoadItems(context.Background(), zap.NewNop()) + require.NoError(t, err) + require.Nil(t, items) + }) + + t.Run("returns all operations with persisted query extensions", func(t *testing.T) { + t.Parallel() + store := pqlmanifest.NewStore(zap.NewNop()) + store.Load(&pqlmanifest.Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{ + "sha256abc": "query Employees { employees { id } }", + "sha256def": "mutation CreateUser { createUser { id } }", + }, + }) + source := NewManifestWarmupSource(store) + + items, err := source.LoadItems(context.Background(), zap.NewNop()) + require.NoError(t, err) + require.Len(t, items, 2) + + // Collect items into a map for deterministic assertions (map iteration is unordered) + byHash := make(map[string]string) + for _, item := range items { + require.NotNil(t, item.Request) + require.NotNil(t, item.Request.Extensions) + require.NotNil(t, item.Request.Extensions.PersistedQuery) + require.Equal(t, int32(1), item.Request.Extensions.PersistedQuery.Version) + byHash[item.Request.Extensions.PersistedQuery.Sha256Hash] = item.Request.Query + } + + require.Equal(t, "query Employees { employees { id } }", byHash["sha256abc"]) + require.Equal(t, "mutation CreateUser { createUser { id } }", byHash["sha256def"]) + }) + + t.Run("does not include client info", func(t *testing.T) { + t.Parallel() + store := pqlmanifest.NewStore(zap.NewNop()) + store.Load(&pqlmanifest.Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{"hash1": "query { a }"}, + }) + source := NewManifestWarmupSource(store) + + items, err := source.LoadItems(context.Background(), zap.NewNop()) + require.NoError(t, err) + require.Len(t, items, 1) + require.Nil(t, items[0].Client) + }) +} diff --git a/router/core/graph_server.go b/router/core/graph_server.go index d9e73be51f..94baf88bc0 100644 --- a/router/core/graph_server.go +++ b/router/core/graph_server.go @@ -1450,6 +1450,56 @@ func (s *graphServer) buildGraphMux( } } + // Prewarm all persisted operations from the PQL manifest so that the first request is served from cache. + // This runs independently of the cache warmup configuration above. + manifestWarmup := s.persistedOperationsConfig.Manifest.Warmup + if manifestWarmup.Enabled && s.persistedOperationClient != nil { + if pqlStore := s.persistedOperationClient.PQLStore(); pqlStore != nil && pqlStore.IsLoaded() { + manifestProcessor := NewCacheWarmupPlanningProcessor(&CacheWarmupPlanningProcessorOptions{ + OperationProcessor: operationProcessor, + OperationPlanner: operationPlanner, + ComplexityLimits: s.securityConfiguration.ComplexityLimits, + RouterSchema: executor.RouterSchema, + TrackSchemaUsage: s.graphqlMetricsConfig.Enabled, + DisableVariablesRemapping: s.engineExecutionConfiguration.DisableVariablesRemapping, + }) + + manifestWarmupConfig := &CacheWarmupConfig{ + Log: s.logger, + Processor: manifestProcessor, + Workers: manifestWarmup.Workers, + ItemsPerSecond: manifestWarmup.ItemsPerSecond, + Timeout: manifestWarmup.Timeout, + Source: NewManifestWarmupSource(pqlStore), + } + + err = WarmupCaches(ctx, manifestWarmupConfig) + if err != nil { + s.logger.Error("Failed to warmup PQL manifest operations", zap.Error(err)) + } + + // Re-warm when the manifest is updated by the poller. + // The callback runs in a new goroutine to avoid blocking the poll loop. + pqlStore.SetOnUpdate(func() { + rewarmCtx, cancel := context.WithTimeout(context.Background(), manifestWarmup.Timeout) + defer cancel() + + rewarmConfig := &CacheWarmupConfig{ + Log: s.logger, + Processor: manifestProcessor, + Workers: manifestWarmup.Workers, + ItemsPerSecond: manifestWarmup.ItemsPerSecond, + Timeout: manifestWarmup.Timeout, + Source: NewManifestWarmupSource(pqlStore), + } + + if rewarmErr := WarmupCaches(rewarmCtx, rewarmConfig); rewarmErr != nil { + s.logger.Error("Failed to re-warm PQL manifest operations after update", zap.Error(rewarmErr)) + } + }) + } + } + authorizerOptions := &CosmoAuthorizerOptions{ FieldConfigurations: opts.EngineConfig.FieldConfigurations, RejectOperationIfUnauthorized: false, diff --git a/router/core/operation_processor.go b/router/core/operation_processor.go index a7b948b3ea..fafa168a94 100644 --- a/router/core/operation_processor.go +++ b/router/core/operation_processor.go @@ -1241,10 +1241,16 @@ func (o *OperationKit) generatePersistedOperationCacheKey(clientName string, ski // If there are multiple operations in the document, we need to include the operation name in the cache key _, _ = o.kit.keyGen.WriteString(o.parsedOperation.Request.OperationName) } - _, _ = o.kit.keyGen.WriteString(clientName) - // Include manifest revision so cache entries naturally invalidate when the manifest changes - if o.operationProcessor.persistedOperationClient != nil { - _, _ = o.kit.keyGen.WriteString(o.operationProcessor.persistedOperationClient.ManifestRevision()) + manifestEnabled := o.operationProcessor.persistedOperationClient != nil && + o.operationProcessor.persistedOperationClient.ManifestEnabled() + + if !manifestEnabled { + // Non-manifest mode: include clientName since operations are per-client. + // Manifest mode: exclude clientName because manifest operations are global + // and the SHA256 hash already uniquely identifies the operation body. + // Cache entries persist across manifest reloads — removed operations are + // naturally evicted by the LRU. + _, _ = o.kit.keyGen.WriteString(clientName) } o.writeSkipIncludeCacheKeyToKeyGen(skipIncludeVariableNames) sum := o.kit.keyGen.Sum64() diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index 48f48bc9bd..d204161d5e 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -127,13 +127,14 @@ func (c *Client) APQEnabled() bool { return c.apqClient != nil && c.apqClient.Enabled() } -// ManifestRevision returns the current PQL manifest revision, or "" if no manifest is loaded. -// Used to include in cache keys so entries naturally invalidate when the manifest changes. -func (c *Client) ManifestRevision() string { - if c.pqlStore == nil { - return "" - } - return c.pqlStore.Revision() +// ManifestEnabled returns whether a PQL manifest is configured and loaded. +func (c *Client) ManifestEnabled() bool { + return c.pqlStore != nil && c.pqlStore.IsLoaded() +} + +// PQLStore returns the PQL manifest store, or nil if no manifest is configured. +func (c *Client) PQLStore() *pqlmanifest.Store { + return c.pqlStore } func (c *Client) Close() { diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher_test.go b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go index 5469ffb307..e46631e68b 100644 --- a/router/internal/persistedoperation/pqlmanifest/fetcher_test.go +++ b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go @@ -53,6 +53,7 @@ func newETagCDNHandler(m *Manifest) http.Handler { } func TestFetch_SendsIfNoneMatchHeader(t *testing.T) { + t.Parallel() var receivedHeaders http.Header var receivedMethod string var receivedBody []byte @@ -88,6 +89,7 @@ func TestFetch_SendsIfNoneMatchHeader(t *testing.T) { } func TestFetch_NoIfNoneMatchOnFirstRequest(t *testing.T) { + t.Parallel() var receivedHeaders http.Header m := &Manifest{ @@ -118,6 +120,7 @@ func TestFetch_NoIfNoneMatchOnFirstRequest(t *testing.T) { } func TestFetch_Handles304Response(t *testing.T) { + t.Parallel() server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNotModified) })) @@ -132,6 +135,7 @@ func TestFetch_Handles304Response(t *testing.T) { } func TestFetch_Handles200WithManifest(t *testing.T) { + t.Parallel() m := &Manifest{ Version: 1, Revision: "rev-456", @@ -153,6 +157,7 @@ func TestFetch_Handles200WithManifest(t *testing.T) { } func TestFetch_ETagRoundTrip(t *testing.T) { + t.Parallel() m := &Manifest{ Version: 1, Revision: "rev-rt", @@ -180,6 +185,7 @@ func TestFetch_ETagRoundTrip(t *testing.T) { } func TestFetch_UsesGETMethod(t *testing.T) { + t.Parallel() var receivedMethod string m := &Manifest{ diff --git a/router/internal/persistedoperation/pqlmanifest/poller_test.go b/router/internal/persistedoperation/pqlmanifest/poller_test.go index 61a85f4366..8c44268c75 100644 --- a/router/internal/persistedoperation/pqlmanifest/poller_test.go +++ b/router/internal/persistedoperation/pqlmanifest/poller_test.go @@ -14,6 +14,7 @@ import ( ) func TestPoller_FetchInitial(t *testing.T) { + t.Parallel() m := &Manifest{ Version: 1, Revision: "rev-1", @@ -37,6 +38,7 @@ func TestPoller_FetchInitial(t *testing.T) { } func TestPoller_FetchInitialError(t *testing.T) { + t.Parallel() server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusInternalServerError) })) @@ -51,6 +53,7 @@ func TestPoller_FetchInitialError(t *testing.T) { } func TestPoller_PollUpdatesManifest(t *testing.T) { + t.Parallel() manifestV1 := &Manifest{ Version: 1, Revision: "rev-1", @@ -114,6 +117,7 @@ func TestPoller_PollUpdatesManifest(t *testing.T) { } func TestPoller_PollStopsOnContextCancel(t *testing.T) { + t.Parallel() var fetchCount atomic.Int32 m := &Manifest{ @@ -148,6 +152,7 @@ func TestPoller_PollStopsOnContextCancel(t *testing.T) { } func TestPoller_PollContinuesOnFetchError(t *testing.T) { + t.Parallel() var requestCount atomic.Int32 m := &Manifest{ diff --git a/router/internal/persistedoperation/pqlmanifest/store.go b/router/internal/persistedoperation/pqlmanifest/store.go index ab3577afa3..21f9dcc1c3 100644 --- a/router/internal/persistedoperation/pqlmanifest/store.go +++ b/router/internal/persistedoperation/pqlmanifest/store.go @@ -18,6 +18,7 @@ type Manifest struct { type Store struct { manifest atomic.Pointer[Manifest] + onUpdate atomic.Value // stores func() logger *zap.Logger } @@ -27,9 +28,19 @@ func NewStore(logger *zap.Logger) *Store { } } -// Load swaps the manifest atomically. +// SetOnUpdate registers a callback that is invoked after the manifest is updated via Load. +// The callback is called asynchronously in a new goroutine to avoid blocking the poller. +func (s *Store) SetOnUpdate(fn func()) { + s.onUpdate.Store(fn) +} + +// Load swaps the manifest atomically and invokes the onUpdate callback if set. func (s *Store) Load(manifest *Manifest) { s.manifest.Store(manifest) + + if fn, ok := s.onUpdate.Load().(func()); ok && fn != nil { + go fn() + } } // LookupByHash performs an O(1) map lookup by sha256 hash. @@ -114,3 +125,13 @@ func (s *Store) OperationCount() int { } return len(m.Operations) } + +// AllOperations returns all operations from the manifest for iteration (e.g., warmup). +// Returns nil if no manifest is loaded. +func (s *Store) AllOperations() map[string]string { + m := s.manifest.Load() + if m == nil { + return nil + } + return m.Operations +} diff --git a/router/internal/persistedoperation/pqlmanifest/store_test.go b/router/internal/persistedoperation/pqlmanifest/store_test.go index 15002cc9d2..2e1c20c727 100644 --- a/router/internal/persistedoperation/pqlmanifest/store_test.go +++ b/router/internal/persistedoperation/pqlmanifest/store_test.go @@ -1,7 +1,9 @@ package pqlmanifest import ( + "sync/atomic" "testing" + "time" "github.com/stretchr/testify/require" "go.uber.org/zap" @@ -39,4 +41,44 @@ func TestStore(t *testing.T) { require.True(t, found) require.Equal(t, "query { b }", string(body)) }) + + t.Run("AllOperations returns nil when not loaded", func(t *testing.T) { + store := NewStore(zap.NewNop()) + require.Nil(t, store.AllOperations()) + }) + + t.Run("AllOperations returns all operations", func(t *testing.T) { + store := NewStore(zap.NewNop()) + ops := map[string]string{ + "hash1": "query { a }", + "hash2": "query { b }", + "hash3": "mutation { c }", + } + store.Load(&Manifest{Version: 1, Revision: "rev-1", Operations: ops}) + + result := store.AllOperations() + require.Equal(t, ops, result) + }) + + t.Run("SetOnUpdate callback is invoked on Load", func(t *testing.T) { + store := NewStore(zap.NewNop()) + + var called atomic.Bool + store.SetOnUpdate(func() { + called.Store(true) + }) + + store.Load(&Manifest{Version: 1, Revision: "rev-1", Operations: map[string]string{"a": "query { a }"}}) + + // Callback runs in a goroutine, wait briefly + require.Eventually(t, func() bool { + return called.Load() + }, time.Second, 10*time.Millisecond) + }) + + t.Run("SetOnUpdate not called when no callback set", func(t *testing.T) { + store := NewStore(zap.NewNop()) + // Should not panic + store.Load(&Manifest{Version: 1, Revision: "rev-1", Operations: map[string]string{"a": "query { a }"}}) + }) } diff --git a/router/pkg/authentication/oidc_discovery_client.go b/router/pkg/authentication/oidc_discovery_client.go index 75e403d4bf..9bbfc924ad 100644 --- a/router/pkg/authentication/oidc_discovery_client.go +++ b/router/pkg/authentication/oidc_discovery_client.go @@ -2,6 +2,7 @@ package authentication import ( "encoding/json" + "io" "net/http" "strings" ) @@ -45,6 +46,7 @@ func (c *oidcDiscoveryClient) RoundTrip(req *http.Request) (*http.Response, erro } defer func() { + _, _ = io.Copy(io.Discard, resp.Body) _ = resp.Body.Close() }() diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index 25944ee20b..3b684b4523 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -972,10 +972,18 @@ type AutomaticPersistedQueriesCacheConfig struct { TTL int `yaml:"ttl" env:"APQ_CACHE_TTL" envDefault:"-1"` } +type PQLManifestWarmupConfig struct { + Enabled bool `yaml:"enabled" envDefault:"true" env:"ENABLED"` + Workers int `yaml:"workers" envDefault:"4" env:"WORKERS"` + ItemsPerSecond int `yaml:"items_per_second" envDefault:"50" env:"ITEMS_PER_SECOND"` + Timeout time.Duration `yaml:"timeout" envDefault:"30s" env:"TIMEOUT"` +} + type PQLManifestConfig struct { - Enabled bool `yaml:"enabled" envDefault:"false" env:"ENABLED"` - PollInterval time.Duration `yaml:"poll_interval" envDefault:"10s" env:"POLL_INTERVAL"` - PollJitter time.Duration `yaml:"poll_jitter" envDefault:"5s" env:"POLL_JITTER"` + Enabled bool `yaml:"enabled" envDefault:"false" env:"ENABLED"` + PollInterval time.Duration `yaml:"poll_interval" envDefault:"10s" env:"POLL_INTERVAL"` + PollJitter time.Duration `yaml:"poll_jitter" envDefault:"5s" env:"POLL_JITTER"` + Warmup PQLManifestWarmupConfig `yaml:"warmup" envPrefix:"WARMUP_"` } type PersistedOperationsConfig struct { @@ -1172,7 +1180,7 @@ type Config struct { Modules map[string]interface{} `yaml:"modules,omitempty"` Headers HeaderRules `yaml:"headers,omitempty"` - TrafficShaping TrafficShapingRules `yaml:"traffic_shaping,omitempty"` + TrafficShaping TrafficShapingRules `yaml:"traffic_shaping,omitempty" envPrefix:"TRAFFIC_SHAPING_"` FileUpload FileUpload `yaml:"file_upload,omitempty"` AccessLogs AccessLogsConfig `yaml:"access_logs,omitempty"` Batching BatchingConfig `yaml:"batching,omitempty"` diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index c67a6c7700..5d3f13c12f 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -220,6 +220,39 @@ "duration": { "minimum": "1s" } + }, + "warmup": { + "type": "object", + "additionalProperties": false, + "description": "Configuration for cache warmup of PQL manifest operations. When enabled, the router pre-processes all operations from the manifest on startup and after manifest updates.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable cache warmup for PQL manifest operations.", + "default": true + }, + "workers": { + "type": "integer", + "description": "The number of concurrent workers used to warm up the cache.", + "default": 4, + "minimum": 1 + }, + "items_per_second": { + "type": "integer", + "description": "Rate limit for items processed per second. Set to 0 for unlimited throughput.", + "default": 50, + "minimum": 0 + }, + "timeout": { + "type": "string", + "format": "go-duration", + "description": "The maximum time allowed for the warmup to complete. The period is specified as a string with a number and a unit, e.g. 10s, 1m, 5m.", + "default": "30s", + "duration": { + "minimum": "1s" + } + } + } } } } diff --git a/router/pkg/config/config_test.go b/router/pkg/config/config_test.go index 678324a0ef..d93103fb9e 100644 --- a/router/pkg/config/config_test.go +++ b/router/pkg/config/config_test.go @@ -1000,6 +1000,11 @@ func TestConfigMerging(t *testing.T) { Manifest: PQLManifestConfig{ PollInterval: 10 * time.Second, PollJitter: 5 * time.Second, + Warmup: PQLManifestWarmupConfig{ + Enabled: true, + Workers: 4, + Timeout: 30 * time.Second, + }, }, }, AutomaticPersistedQueries: AutomaticPersistedQueriesConfig{ diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index c970e1ec9b..2c845745b3 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -546,7 +546,13 @@ "Manifest": { "Enabled": false, "PollInterval": 10000000000, - "PollJitter": 5000000000 + "PollJitter": 5000000000, + "Warmup": { + "Enabled": true, + "Workers": 4, + "ItemsPerSecond": 50, + "Timeout": 30000000000 + } } }, "AutomaticPersistedQueries": { diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index 31380c18fe..d8c5f76db2 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -981,7 +981,13 @@ "Manifest": { "Enabled": true, "PollInterval": 30000000000, - "PollJitter": 10000000000 + "PollJitter": 10000000000, + "Warmup": { + "Enabled": true, + "Workers": 4, + "ItemsPerSecond": 50, + "Timeout": 30000000000 + } } }, "AutomaticPersistedQueries": {