diff --git a/cdn-server/cdn/src/index.ts b/cdn-server/cdn/src/index.ts index 02c4a4bc8f..8fc2e5c17c 100644 --- a/cdn-server/cdn/src/index.ts +++ b/cdn-server/cdn/src/index.ts @@ -26,12 +26,12 @@ export interface BlobStorage { headObject({ context, key, - schemaVersionId, + version, }: { context: Context; abortSignal?: AbortSignal; key: string; - schemaVersionId: string; + version: string; }): Promise; } @@ -106,6 +106,9 @@ const jwtMiddleware = (secret: string | ((c: Context) => string)) => { }; }; +// Deprecated: Individual persisted operation lookups via CDN are deprecated. +// The router now downloads all operations at once via the PQL manifest, avoiding +// per-request latency. This handler is kept for backward compatibility with older routers. const persistedOperation = (storage: BlobStorage) => { return async (c: Context) => { const organizationId = c.get('authenticatedOrganizationId'); @@ -165,7 +168,7 @@ const latestValidRouterConfig = (storage: BlobStorage) => { // starts for the first time, and we need to return a config anyway. if (body?.version) { try { - isModified = await storage.headObject({ context: c, key, schemaVersionId: body.version }); + isModified = await storage.headObject({ context: c, key, version: body.version }); } catch (e: any) { if (e instanceof BlobNotFoundError) { return c.notFound(); @@ -262,6 +265,60 @@ const cacheOperations = (storage: BlobStorage) => { }; }; +const persistedOperationsManifest = (storage: BlobStorage) => { + return async (c: Context) => { + const organizationId = c.get('authenticatedOrganizationId'); + const federatedGraphId = c.get('authenticatedFederatedGraphId'); + + if (organizationId !== c.req.param('organization_id') || federatedGraphId !== c.req.param('federated_graph_id')) { + return c.text('Bad Request', 400); + } + + const key = `${organizationId}/${federatedGraphId}/operations/manifest.json`; + + const ifNoneMatch = c.req.header('If-None-Match'); + const clientVersion = ifNoneMatch?.replace(/^"(.*)"$/, '$1') || null; + + let isModified = true; + + if (clientVersion) { + try { + isModified = await storage.headObject({ context: c, key, version: clientVersion }); + } catch (e: any) { + if (e instanceof BlobNotFoundError) { + return c.notFound(); + } + throw e; + } + } + + if (!isModified) { + c.header('ETag', `"${clientVersion}"`); + return c.body(null, 304); + } + + let blobObject: BlobObject; + + try { + blobObject = await storage.getObject({ context: c, key, cacheControl: 'no-cache' }); + } catch (e: any) { + if (e instanceof BlobNotFoundError) { + return c.notFound(); + } + throw e; + } + + if (blobObject.metadata?.version) { + c.header('ETag', `"${blobObject.metadata.version}"`); + } + c.header('Content-Type', 'application/json; charset=UTF-8'); + + return stream(c, async (stream) => { + await stream.pipe(blobObject.stream); + }); + }; +}; + const subgraphChecks = (storage: BlobStorage) => { return async (c: Context) => { const organizationId = c.get('authenticatedOrganizationId'); @@ -301,6 +358,11 @@ export const cdn = , opts: CdnOptions, ) => { + const manifestPath = '/:organization_id/:federated_graph_id/operations/manifest.json'; + hono + .use(manifestPath, jwtMiddleware(opts.authJwtSecret)) + .get(manifestPath, persistedOperationsManifest(opts.blobStorage)); + const operations = '/:organization_id/:federated_graph_id/operations/:client_id/:operation{.+\\.json$}'; const latestValidRouterConfigs = '/:organization_id/:federated_graph_id/routerconfigs/latest.json'; hono.use(operations, jwtMiddleware(opts.authJwtSecret)).get(operations, persistedOperation(opts.blobStorage)); diff --git a/cdn-server/cdn/test/cdn.test.ts b/cdn-server/cdn/test/cdn.test.ts index 6a936930af..9e2eec6e87 100644 --- a/cdn-server/cdn/test/cdn.test.ts +++ b/cdn-server/cdn/test/cdn.test.ts @@ -30,12 +30,12 @@ class InMemoryBlobStorage implements BlobStorage { return Promise.resolve({ stream, metadata: obj.metadata }); } - headObject({ key, schemaVersionId }: { key: string; schemaVersionId: string }): Promise { + headObject({ key, version }: { key: string; version: string }): Promise { const obj = this.objects.get(key); if (!obj) { return Promise.reject(new BlobNotFoundError(`Object with key ${key} not found`)); } - if (obj.metadata?.version === schemaVersionId) { + if (obj.metadata?.version === version) { return Promise.resolve(false); } return Promise.resolve(true); @@ -554,6 +554,207 @@ describe('CDN handlers', () => { }); }); + describe('Test persisted operations manifest handler', async () => { + const federatedGraphId = 'federatedGraphId'; + const organizationId = 'organizationId'; + const token = await generateToken(organizationId, federatedGraphId, secretKey); + const blobStorage = new InMemoryBlobStorage(); + const requestPath = `/${organizationId}/${federatedGraphId}/operations/manifest.json`; + + const app = new Hono(); + + cdn(app, { + authJwtSecret: secretKey, + authAdmissionJwtSecret: secretAdmissionKey, + blobStorage, + }); + + test('it returns a 401 if no Authorization header is provided', async () => { + const res = await app.request(requestPath, { + method: 'GET', + }); + expect(res.status).toBe(401); + }); + + test('it returns a 401 if an invalid Authorization header is provided', async () => { + const res = await app.request(requestPath, { + method: 'GET', + headers: { + Authorization: `Bearer ${token.slice(0, -1)}}`, + }, + }); + expect(res.status).toBe(401); + }); + + test('it returns a 400 if the graph or organization ids does not match with the JWT payload', async () => { + const res = await app.request(`/foo/bar/operations/manifest.json`, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + }, + }); + expect(res.status).toBe(400); + }); + + test('it returns a 401 if the token has expired', async () => { + const token = await new SignJWT({ + organization_id: organizationId, + federated_graph_id: federatedGraphId, + exp: Math.floor(Date.now() / 1000) - 60, + }) + .setProtectedHeader({ alg: 'HS256' }) + .sign(new TextEncoder().encode(secretKey)); + const res = await app.request(requestPath, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + }, + }); + expect(res.status).toBe(401); + }); + + test('it returns the manifest with ETag on first request', async () => { + const manifestContents = JSON.stringify({ + version: 1, + revision: 'abc123', + generatedAt: '2025-01-01T00:00:00.000Z', + operations: { + sha256hash1: 'query { hello }', + }, + }); + + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { + buffer: Buffer.from(manifestContents), + metadata: { version: 'abc123' }, + }); + + const res = await app.request(requestPath, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + }, + }); + expect(res.status).toBe(200); + expect(res.headers.get('Content-Type')).toBe('application/json; charset=UTF-8'); + expect(res.headers.get('ETag')).toBe('"abc123"'); + expect(await res.text()).toBe(manifestContents); + }); + + test('it returns 304 with ETag when If-None-Match matches', async () => { + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { + buffer: Buffer.from(JSON.stringify({ version: 1, revision: 'abc123', operations: {} })), + metadata: { version: 'abc123' }, + }); + + const res = await app.request(requestPath, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + 'If-None-Match': '"abc123"', + }, + }); + expect(res.status).toBe(304); + expect(res.headers.get('ETag')).toBe('"abc123"'); + }); + + test('it returns 200 with new ETag when If-None-Match does not match', async () => { + const manifestContents = JSON.stringify({ + version: 1, + revision: 'def456', + generatedAt: '2025-01-01T00:00:00.000Z', + operations: { + sha256hash1: 'query { hello }', + }, + }); + + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { + buffer: Buffer.from(manifestContents), + metadata: { version: 'def456' }, + }); + + const res = await app.request(requestPath, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + 'If-None-Match': '"old-revision"', + }, + }); + expect(res.status).toBe(200); + expect(res.headers.get('ETag')).toBe('"def456"'); + expect(await res.text()).toBe(manifestContents); + }); + + test('ETag round-trip: fetch returns ETag, re-fetch with that ETag returns 304', async () => { + const manifestContents = JSON.stringify({ + version: 1, + revision: 'rev-round-trip', + generatedAt: '2025-01-01T00:00:00.000Z', + operations: { hash1: 'query { hello }' }, + }); + + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/manifest.json`, { + buffer: Buffer.from(manifestContents), + metadata: { version: 'rev-round-trip' }, + }); + + // First request: no ETag, should get 200 with ETag + const res1 = await app.request(requestPath, { + method: 'GET', + headers: { Authorization: `Bearer ${token}` }, + }); + expect(res1.status).toBe(200); + const etag = res1.headers.get('ETag'); + expect(etag).toBe('"rev-round-trip"'); + expect(await res1.text()).toBe(manifestContents); + + // Second request: send ETag back as If-None-Match, should get 304 + const res2 = await app.request(requestPath, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + 'If-None-Match': etag!, + }, + }); + expect(res2.status).toBe(304); + expect(res2.headers.get('ETag')).toBe(etag); + }); + + test('it returns a 404 if the manifest does not exist', async () => { + const otherBlobStorage = new InMemoryBlobStorage(); + const otherApp = new Hono(); + + cdn(otherApp, { + authJwtSecret: secretKey, + authAdmissionJwtSecret: secretAdmissionKey, + blobStorage: otherBlobStorage, + }); + + const res = await otherApp.request(requestPath, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + }, + }); + expect(res.status).toBe(404); + }); + + test('it does not conflict with the individual persisted operations route', async () => { + const operationContents = JSON.stringify({ version: 1, body: 'query { hello }' }); + blobStorage.objects.set(`${organizationId}/${federatedGraphId}/operations/clientName/operation.json`, { + buffer: Buffer.from(operationContents), + }); + + const res = await app.request(`/${organizationId}/${federatedGraphId}/operations/clientName/operation.json`, { + method: 'GET', + headers: { + Authorization: `Bearer ${token}`, + }, + }); + expect(res.status).toBe(200); + expect(await res.text()).toBe(operationContents); + }); + }); + describe('schema check extensions handler', async () => { const organizationId = 'organizationId'; const checkId = randomUUID(); diff --git a/cdn-server/src/s3.ts b/cdn-server/src/s3.ts index 8c3db88334..81635e78d2 100644 --- a/cdn-server/src/s3.ts +++ b/cdn-server/src/s3.ts @@ -49,15 +49,7 @@ class S3BlobStorage implements BlobStorage { } } - async headObject({ - context, - key, - schemaVersionId, - }: { - context: Context; - key: string; - schemaVersionId: string; - }): Promise { + async headObject({ context, key, version }: { context: Context; key: string; version: string }): Promise { const command = new HeadObjectCommand({ Bucket: this.bucketName, Key: key, @@ -72,7 +64,7 @@ class S3BlobStorage implements BlobStorage { } else if (resp.$metadata.httpStatusCode !== 200) { throw new Error(`Failed to fetch the metadata of the object.`); } - if (resp.Metadata && resp.Metadata.version === schemaVersionId) { + if (resp.Metadata && resp.Metadata.version === version) { return false; } return true; diff --git a/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts b/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts index 4c74c270a5..59baf9e45b 100644 --- a/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts +++ b/controlplane/src/core/bufservices/persisted-operation/deletePersistedOperation.ts @@ -5,12 +5,10 @@ import type { DeletePersistedOperationRequest, DeletePersistedOperationResponse, } from '@wundergraph/cosmo-connect/dist/platform/v1/platform_pb'; -import type { BlobStorage } from '../../blobstorage/index.js'; import { FederatedGraphRepository } from '../../repositories/FederatedGraphRepository.js'; import { UnauthorizedError } from '../../errors/errors.js'; import { OperationsRepository } from '../../repositories/OperationsRepository.js'; import type { RouterOptions } from '../../routes.js'; -import type { PersistedOperationWithClientDTO } from '../../../types/index.js'; import { enrichLogger, getLogger, handleError } from '../../util.js'; import { createBlobStoragePath } from './utils.js'; @@ -76,22 +74,7 @@ export function deletePersistedOperation( }); try { - await opts.blobStorage.deleteObject({ - key: path, - }); - return { - response: { - code: EnumStatusCode.OK, - }, - operation: deletedOperation - ? { - id: deletedOperation.id, - operationId: deletedOperation.operationId, - clientName: deletedOperation.clientName, - operationNames: deletedOperation.operationNames, - } - : undefined, - }; + await opts.blobStorage.deleteObject({ key: path }); } catch (e) { const error = e instanceof Error ? e : new Error('Unknown error'); logger.error(error, `Could not delete operation for ${operation.operationId} at ${path}`); @@ -103,5 +86,33 @@ export function deletePersistedOperation( }, }; } + + try { + await operationsRepo.generateAndUploadManifest({ + organizationId: authContext.organizationId, + blobStorage: opts.blobStorage, + logger, + }); + } catch (e) { + const error = e instanceof Error ? e : new Error('Unknown error'); + logger.error(error, `Failed to regenerate PQL manifest after deleting operation ${operation.operationId}`, { + federatedGraphId: federatedGraph.id, + organizationId: authContext.organizationId, + }); + } + + return { + response: { + code: EnumStatusCode.OK, + }, + operation: deletedOperation + ? { + id: deletedOperation.id, + operationId: deletedOperation.operationId, + clientName: deletedOperation.clientName, + operationNames: deletedOperation.operationNames, + } + : undefined, + }; }); } diff --git a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts index 1b646ed281..f4e24ceebd 100644 --- a/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts +++ b/controlplane/src/core/bufservices/persisted-operation/publishPersistedOperations.ts @@ -14,7 +14,7 @@ import { buildASTSchema as graphQLBuildASTSchema, DocumentNode, parse, validate import { PublishedOperationData, UpdatedPersistedOperation } from '../../../types/index.js'; import { FederatedGraphRepository } from '../../repositories/FederatedGraphRepository.js'; import { DefaultNamespace } from '../../repositories/NamespaceRepository.js'; -import { OperationsRepository } from '../../repositories/OperationsRepository.js'; +import { MAX_MANIFEST_OPERATIONS, OperationsRepository } from '../../repositories/OperationsRepository.js'; import type { RouterOptions } from '../../routes.js'; import { enrichLogger, extractOperationNames, getLogger, handleError } from '../../util.js'; import { UnauthorizedError } from '../../errors/errors.js'; @@ -158,6 +158,24 @@ export function publishPersistedOperations( operationsResult.map((op) => [op.operationId, { hash: op.hash, operationNames: op.operationNames }]), ); + // Check if adding new operations would exceed the manifest limit + const allExistingOperations = await operationsRepo.getAllPersistedOperationsForGraph(); + const existingHashes = new Set(allExistingOperations.map((op) => op.hash)); + const newOperationCount = req.operations.filter((op) => { + const hash = crypto.createHash('sha256').update(op.contents).digest('hex'); + return !existingHashes.has(hash); + }).length; + + if (allExistingOperations.length + newOperationCount > MAX_MANIFEST_OPERATIONS) { + return { + response: { + code: EnumStatusCode.ERR, + details: `Operation limit exceeded: adding ${newOperationCount} new operations would bring the total to ${allExistingOperations.length + newOperationCount}, which exceeds the maximum of ${MAX_MANIFEST_OPERATIONS} operations per graph`, + }, + operations: [], + }; + } + const processOperation = async ( operation: PersistedOperation, ): Promise<{ @@ -202,6 +220,9 @@ export function publishPersistedOperations( version: 1, body: operation.contents, }; + // Deprecated: Uploading individual operations to blob storage is deprecated. + // The router now downloads all operations at once via the PQL manifest, avoiding + // per-request CDN latency. This upload is kept for backward compatibility with older routers. try { await opts.blobStorage.putObject({ key: path, @@ -263,6 +284,20 @@ export function publishPersistedOperations( await operationsRepo.updatePersistedOperations(clientId, userId, updatedOperations); + try { + await operationsRepo.generateAndUploadManifest({ + organizationId, + blobStorage: opts.blobStorage, + logger, + }); + } catch (e) { + const error = e instanceof Error ? e : new Error('Unknown error'); + logger.error(error, 'Failed to regenerate PQL manifest after publishing persisted operations', { + federatedGraphId: federatedGraph.id, + organizationId, + }); + } + return { response: { code: EnumStatusCode.OK, diff --git a/controlplane/src/core/bufservices/persisted-operation/utils.ts b/controlplane/src/core/bufservices/persisted-operation/utils.ts index d58ce64da5..b814115eb3 100644 --- a/controlplane/src/core/bufservices/persisted-operation/utils.ts +++ b/controlplane/src/core/bufservices/persisted-operation/utils.ts @@ -9,3 +9,11 @@ export const createBlobStoragePath = ({ clientName: string; operationId: string; }): string => `${organizationId}/${fedGraphId}/operations/${clientName}/${operationId}.json`; + +export const createManifestBlobStoragePath = ({ + organizationId, + fedGraphId, +}: { + organizationId: string; + fedGraphId: string; +}): string => `${organizationId}/${fedGraphId}/operations/manifest.json`; diff --git a/controlplane/src/core/repositories/OperationsRepository.ts b/controlplane/src/core/repositories/OperationsRepository.ts index 2cb3e97b98..ab999bc3f8 100644 --- a/controlplane/src/core/repositories/OperationsRepository.ts +++ b/controlplane/src/core/repositories/OperationsRepository.ts @@ -1,10 +1,14 @@ +import crypto from 'node:crypto'; import { OverrideChange } from '@wundergraph/cosmo-connect/dist/platform/v1/platform_pb'; import { aliasedTable, and, asc, count, desc, eq, isNull, sql } from 'drizzle-orm'; import { PostgresJsDatabase } from 'drizzle-orm/postgres-js'; import { PlainMessage } from '@bufbuild/protobuf'; +import { FastifyBaseLogger } from 'fastify'; import { DBSchemaChangeType } from '../../db/models.js'; import * as schema from '../../db/schema.js'; import { federatedGraphClients, federatedGraphPersistedOperations, users } from '../../db/schema.js'; +import type { BlobStorage } from '../blobstorage/index.js'; +import { createManifestBlobStoragePath } from '../bufservices/persisted-operation/utils.js'; import { ClientDTO, PersistedOperationDTO, @@ -15,6 +19,15 @@ import { } from '../../types/index.js'; import { SchemaCheckRepository } from './SchemaCheckRepository.js'; +export const MAX_MANIFEST_OPERATIONS = 3000; + +export interface PQLManifest { + version: 1; + revision: string; + generatedAt: string; + operations: Record; // sha256 hash -> operation body +} + type ChangeOverride = IgnoreAllOverride & { changeType: DBSchemaChangeType; path: string | null; @@ -227,6 +240,38 @@ export class OperationsRepository { return result!.id; } + public async getAllPersistedOperationsForGraph(): Promise< + Array<{ + hash: string; + operationContent: string; + operationId: string; + operationNames: string[]; + clientName: string; + }> + > { + const results = await this.db + .select({ + hash: federatedGraphPersistedOperations.hash, + operationContent: federatedGraphPersistedOperations.operationContent, + operationId: federatedGraphPersistedOperations.operationId, + operationNames: federatedGraphPersistedOperations.operationNames, + clientName: federatedGraphClients.name, + }) + .from(federatedGraphPersistedOperations) + .innerJoin(federatedGraphClients, eq(federatedGraphClients.id, federatedGraphPersistedOperations.clientId)) + .where(eq(federatedGraphPersistedOperations.federatedGraphId, this.federatedGraphId)); + + return results + .filter((r) => r.operationContent != null) + .map((r) => ({ + hash: r.hash, + operationContent: r.operationContent!, + operationId: r.operationId, + operationNames: r.operationNames ?? [], + clientName: r.clientName, + })); + } + public async getRegisteredClients(): Promise { const fedGraphClients = await this.db.query.federatedGraphClients.findMany({ where: eq(federatedGraphClients.federatedGraphId, this.federatedGraphId), @@ -517,6 +562,57 @@ export class OperationsRepository { }; } + public async generateAndUploadManifest(params: { + organizationId: string; + blobStorage: BlobStorage; + logger: FastifyBaseLogger; + }): Promise<{ revision: string; operationCount: number }> { + const { organizationId, blobStorage, logger } = params; + + const allOperations = await this.getAllPersistedOperationsForGraph(); + + if (allOperations.length === 0) { + logger.warn( + { federatedGraphId: this.federatedGraphId }, + 'No persisted operations with content found for manifest generation', + ); + } + + const operations: Record = {}; + for (const op of allOperations) { + operations[op.operationId] = op.operationContent; + } + + // Compute revision as SHA256 of the deterministic JSON serialization (sorted keys) + const sortedKeys = Object.keys(operations).sort(); + const sortedOperations: Record = {}; + for (const key of sortedKeys) { + sortedOperations[key] = operations[key]; + } + const serialized = JSON.stringify(sortedOperations); + const revision = crypto.createHash('sha256').update(serialized).digest('hex'); + + const manifest: PQLManifest = { + version: 1, + revision, + generatedAt: new Date().toISOString(), + operations: sortedOperations, + }; + + const path = createManifestBlobStoragePath({ organizationId, fedGraphId: this.federatedGraphId }); + + await blobStorage.putObject({ + key: path, + body: Buffer.from(JSON.stringify(manifest), 'utf8'), + contentType: 'application/json; charset=utf-8', + metadata: { version: revision }, + }); + + logger.debug({ revision, operationCount: allOperations.length, path }, 'PQL manifest generated and uploaded'); + + return { revision, operationCount: allOperations.length }; + } + private static createPersistedOperationDTO({ id, operationId, diff --git a/controlplane/test/persisted-operations.test.ts b/controlplane/test/persisted-operations.test.ts index ce9e735235..ee2b0c700d 100644 --- a/controlplane/test/persisted-operations.test.ts +++ b/controlplane/test/persisted-operations.test.ts @@ -1,7 +1,10 @@ +import crypto from 'node:crypto'; import { EnumStatusCode } from '@wundergraph/cosmo-connect/dist/common/common_pb'; import { joinLabel } from '@wundergraph/cosmo-shared'; import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test, vi, type Mock } from 'vitest'; import { ClickHouseClient } from '../src/core/clickhouse/index.js'; +import { FederatedGraphRepository } from '../src/core/repositories/FederatedGraphRepository.js'; +import { MAX_MANIFEST_OPERATIONS, OperationsRepository } from '../src/core/repositories/OperationsRepository.js'; import { afterAllSetup, beforeAllSetup, @@ -241,14 +244,17 @@ describe('Persisted operations', (ctx) => { expect(publishOperationsResp.response?.code).toBe(EnumStatusCode.OK); const storageKeys = blobStorage.keys(); - expect(storageKeys.length).toBe(2); - const keyComponents = storageKeys[1].split('/'); + // 3 keys: routerconfig + operation + manifest + expect(storageKeys.length).toBe(3); + const operationKey = storageKeys.find((key) => key.includes(`/${id}.json`)); + expect(operationKey).toBeDefined(); + const keyComponents = operationKey!.split('/'); const keyFilename = keyComponents.at(-1)!; const keyBasename = keyFilename.split('.')[0]; expect(keyBasename).toBe(id); const blobObject = await blobStorage.getObject({ - key: storageKeys[1], + key: operationKey!, }); const text = await new Response(blobObject.stream).text(); expect(JSON.parse(text)).toEqual({ version: 1, body: query }); @@ -278,18 +284,20 @@ describe('Persisted operations', (ctx) => { expect(publishOperationsResp.response?.code).toBe(EnumStatusCode.OK); const storageKeys = blobStorage.keys(); - expect(storageKeys.length).toBe(2); + expect(storageKeys.length).toBe(3); // The client name should be escaped in the storage key - expect(storageKeys[1]).toContain(encodeURIComponent(clientName)); + const operationKey = storageKeys.find((key) => key.includes(`/${id}.json`)); + expect(operationKey).toBeDefined(); + expect(operationKey).toContain(encodeURIComponent(clientName)); - const keyComponents = storageKeys[1].split('/'); + const keyComponents = operationKey!.split('/'); const keyFilename = keyComponents.at(-1)!; const keyBasename = keyFilename.split('.')[0]; expect(keyBasename).toBe(id); const blobObject = await blobStorage.getObject({ - key: storageKeys[1], + key: operationKey!, }); const text = await new Response(blobObject.stream).text(); expect(JSON.parse(text)).toEqual({ version: 1, body: query }); @@ -325,7 +333,7 @@ describe('Persisted operations', (ctx) => { expect(publishOperationsResp.response?.code).toBe(EnumStatusCode.OK); - expect(blobStorage.keys().length).toBe(2); + expect(blobStorage.keys().length).toBe(3); const deleteFederatedGraphResp = await client.deleteFederatedGraph({ name: fedGraphName, @@ -539,6 +547,230 @@ describe('Persisted operations', (ctx) => { }); }); + describe('manifest generation', () => { + test('Should generate a PQL manifest after publishing persisted operations', async (testContext) => { + const { client, server, blobStorage } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const query = `query { hello }`; + + const publishOperationsResp = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [{ id: genID('hello'), contents: query }], + }); + + expect(publishOperationsResp.response?.code).toBe(EnumStatusCode.OK); + + const storageKeys = blobStorage.keys(); + const manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + expect(manifestKey).toBeDefined(); + + const blobObject = await blobStorage.getObject({ key: manifestKey! }); + const text = await new Response(blobObject.stream).text(); + const manifest = JSON.parse(text); + + expect(manifest.version).toBe(1); + expect(manifest.revision).toBeDefined(); + expect(manifest.generatedAt).toBeDefined(); + expect(Object.keys(manifest.operations).length).toBe(1); + + const entry = Object.values(manifest.operations)[0] as string; + expect(entry).toBe(query); + }); + + test('Should include operations from multiple clients in the manifest', async (testContext) => { + const { client, server, blobStorage } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const queryA = `query { hello }`; + const queryB = `query { __typename }`; + + const publishResp1 = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'client-a', + operations: [{ id: genID('op1'), contents: queryA }], + }); + expect(publishResp1.response?.code).toBe(EnumStatusCode.OK); + + const publishResp2 = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'client-b', + operations: [{ id: genID('op2'), contents: queryB }], + }); + expect(publishResp2.response?.code).toBe(EnumStatusCode.OK); + + const storageKeys = blobStorage.keys(); + const manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + expect(manifestKey).toBeDefined(); + + const blobObject = await blobStorage.getObject({ key: manifestKey! }); + const text = await new Response(blobObject.stream).text(); + const manifest = JSON.parse(text); + + expect(Object.keys(manifest.operations).length).toBe(2); + + const bodies = Object.values(manifest.operations) as string[]; + expect(bodies).toContain(queryA); + expect(bodies).toContain(queryB); + }); + + test('Should regenerate the manifest after deleting a persisted operation', async (testContext) => { + const { client, server, blobStorage } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const query1 = `query { hello }`; + const query2 = `query { __typename }`; + const op1Id = genID('op1'); + const op2Id = genID('op2'); + + const publishResp = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [ + { id: op1Id, contents: query1 }, + { id: op2Id, contents: query2 }, + ], + }); + expect(publishResp.response?.code).toBe(EnumStatusCode.OK); + + // Verify manifest has 2 operations + let storageKeys = blobStorage.keys(); + let manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + let blobObject = await blobStorage.getObject({ key: manifestKey! }); + let text = await new Response(blobObject.stream).text(); + let manifest = JSON.parse(text); + expect(Object.keys(manifest.operations).length).toBe(2); + const revisionBefore = manifest.revision; + + // Delete one operation + const deleteResp = await client.deletePersistedOperation({ + fedGraphName, + namespace: 'default', + operationId: publishResp.operations[0].id, + clientName: 'test-client', + }); + expect(deleteResp.response?.code).toBe(EnumStatusCode.OK); + + // Verify manifest now has 1 operation with a new revision + storageKeys = blobStorage.keys(); + manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + blobObject = await blobStorage.getObject({ key: manifestKey! }); + text = await new Response(blobObject.stream).text(); + manifest = JSON.parse(text); + expect(Object.keys(manifest.operations).length).toBe(1); + expect(manifest.revision).not.toBe(revisionBefore); + }); + + test('Should produce a deterministic revision for the same set of operations', async (testContext) => { + const { client, server, blobStorage } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const query = `query { hello }`; + + const publishResp = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [{ id: genID('hello'), contents: query }], + }); + expect(publishResp.response?.code).toBe(EnumStatusCode.OK); + + const storageKeys = blobStorage.keys(); + const manifestKey = storageKeys.find((key) => key.endsWith('/operations/manifest.json')); + const blobObject1 = await blobStorage.getObject({ key: manifestKey! }); + const text1 = await new Response(blobObject1.stream).text(); + const manifest1 = JSON.parse(text1); + + // Publish the same operations again (will be UP_TO_DATE), which still triggers manifest regen + const publishResp2 = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [{ id: publishResp.operations[0].id, contents: query }], + }); + expect(publishResp2.response?.code).toBe(EnumStatusCode.OK); + + const blobObject2 = await blobStorage.getObject({ key: manifestKey! }); + const text2 = await new Response(blobObject2.stream).text(); + const manifest2 = JSON.parse(text2); + + // Same operations should produce the same revision + expect(manifest2.revision).toBe(manifest1.revision); + }); + + test('Should reject publish when operation limit would be exceeded', async (testContext) => { + const { client, server, blobStorage, users } = await SetupTest({ + dbname, + chClient, + }); + testContext.onTestFinished(() => server.close()); + + const fedGraphName = genID('fedGraph'); + await setupFederatedGraph(fedGraphName, client); + + const user = users.adminAliceCompanyA; + const db = server.db; + const logger = server.log; + + // Resolve the federated graph ID. + const fedGraphRepo = new FederatedGraphRepository(logger, db, user.organizationId); + const fedGraph = await fedGraphRepo.byName(fedGraphName, 'default'); + expect(fedGraph).toBeDefined(); + + // Seed operations directly in the DB to fill up to the limit. + const opsRepo = new OperationsRepository(db, fedGraph!.id); + const clientId = await opsRepo.registerClient('test-client', user.userId); + + const seedOps = Array.from({ length: MAX_MANIFEST_OPERATIONS }, (_, i) => ({ + operationId: `seed-op-${i}`, + hash: crypto.createHash('sha256').update(`seed-op-${i}`).digest('hex'), + filePath: `seed-op-${i}.graphql`, + contents: `query SeedOp${i} { hello }`, + operationNames: [`SeedOp${i}`], + })); + await opsRepo.updatePersistedOperations(clientId, user.userId, seedOps); + + // Publishing a new operation should be rejected because the limit is already reached. + const resp = await client.publishPersistedOperations({ + fedGraphName, + namespace: 'default', + clientName: 'test-client', + operations: [{ id: genID('trigger'), contents: `query ExceedsLimit { hello }` }], + }); + expect(resp.response?.code).toBe(EnumStatusCode.ERR); + expect(resp.response?.details).toContain('Operation limit exceeded'); + }); + }); + describe('check', () => { test('Should check the traffic of the operation', async (testContext) => { const { client, server } = await SetupTest({ diff --git a/controlplane/test/test-util.ts b/controlplane/test/test-util.ts index ea26e6d035..881ffc888f 100644 --- a/controlplane/test/test-util.ts +++ b/controlplane/test/test-util.ts @@ -116,7 +116,7 @@ export const SetupTest = async function ({ const realm = 'test'; const loginRealm = 'master'; - const apiUrl = 'http://localhost:8080'; + const apiUrl = process.env.KC_API_URL || 'http://localhost:8080'; const clientId = 'studio'; const adminUser = 'admin'; const adminPassword = 'changeme'; diff --git a/docs-website/cli/operations/push.mdx b/docs-website/cli/operations/push.mdx index ec229d9479..a2009ada48 100644 --- a/docs-website/cli/operations/push.mdx +++ b/docs-website/cli/operations/push.mdx @@ -25,7 +25,7 @@ The operations are validated against the graph schema before registering them. I ### Options - `-n, --namespace` : The namespace of the federated graph (Default: "default"). -- `-c, --client-name ` The client name / identifier to register these operations under. +- `-c, --client-name ` The client name / identifier to register these operations under. By default, the client name must be sent by the client at request time via the `graphql-client-name` HTTP header. When the router runs in [manifest mode](/router/persisted-queries/persisted-operations#pql-manifest), the client name is used only for organizational structure in the Studio (e.g. you could use `"default"`) and is not matched against incoming requests. - `-f, --file ` Filename to read operations from. This argument can be used multiple times to read operations from multiple files. See [Description](/cli/operations/push#description) for a list of the supported file formats. - `-q, --quiet` Don't produce any output, just exit with success or error. Intended for use in CI checks. - `--format ` Specify an output format for the returned data. Support types include `text` (the default) and `json.` \ No newline at end of file diff --git a/docs-website/router/configuration.mdx b/docs-website/router/configuration.mdx index b0df8644f7..ecc9b6b7bc 100644 --- a/docs-website/router/configuration.mdx +++ b/docs-website/router/configuration.mdx @@ -1167,14 +1167,17 @@ These rules apply to requests being made from the Router to all Subgraphs. | Environment Variable | YAML | Required | Description | Default Value | | ------------------------------------------ | ------------------------------------------ | --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------- | | | persisted_operations | | The configuration for the persisted operations. | | -| PERSISTED_OPERATIONS_DISABLED | persisted_operations.disabled | | Disable persisted operations. | false | +| PERSISTED_OPERATIONS_DISABLED | persisted_operations.disabled | | Disable persisted operations. When set to true, the PQL manifest is also disabled. | false | | | persisted_operations.cache | | LRU cache for persisted operations. | | | PERSISTED_OPERATIONS_CACHE_SIZE | persisted_operations.cache.size | | The size of the cache in SI unit. | "100MB" | | | persisted_operations.storage | | The storage provider for persisted operation. Only one provider can be active. When no provider is specified, the router will fallback to the Cosmo CDN provider to download the persisted operations. | | | PERSISTED_OPERATIONS_STORAGE_PROVIDER_ID | persisted_operations.storage.provider_id | | The ID of the storage provider. The ID must match the ID of the storage provider in the `storage_providers` section. | | -| PERSISTED_OPERATIONS_STORAGE_OBJECT_PREFIX | persisted_operations.storage.object_prefix | | The prefix of the object in the storage provider location. The prefix is put in front of the operation SHA256 hash. \$prefix/SHA256.json | | +| PERSISTED_OPERATIONS_STORAGE_OBJECT_PREFIX | persisted_operations.storage.object_prefix | | The prefix of the object in the storage provider location. The prefix is put in front of the operation SHA256 hash. \$prefix/SHA256.json | | | PERSISTED_OPERATIONS_LOG_UNKNOWN | persisted_operations.log_unknown | | Log operations (sent with the operation body) which haven't yet been persisted. If the value is true, all operations not yet persisted are logged to the router logs. | false | | PERSISTED_OPERATIONS_SAFELIST_ENABLED | persisted_operations.safelist.enabled | | Only allows persisted operations (sent with operation body). If the value is true, all operations not explicitly added to the safelist are blocked. | false | +| PERSISTED_OPERATIONS_MANIFEST_ENABLED | persisted_operations.manifest.enabled | | Enable the PQL manifest. When enabled, the router loads all persisted operations from a single `manifest.json` file and serves them from memory. Uses the `storage` config above when a provider is set, otherwise fetches from the Cosmo CDN. Only S3 and CDN storage providers are supported. | false | +| PERSISTED_OPERATIONS_MANIFEST_POLL_INTERVAL| persisted_operations.manifest.poll_interval| | The interval at which the router polls the Cosmo CDN for manifest updates (only when no storage provider is configured). Minimum 10s. | 10s | +| PERSISTED_OPERATIONS_MANIFEST_POLL_JITTER | persisted_operations.manifest.poll_jitter | | Random jitter added to each poll interval to avoid thundering herd. Minimum 1s. | 5s | ## Automatic Persisted Queries @@ -1218,27 +1221,54 @@ The configuration for the execution setup contains instructions for the router t ### Example YAML config: -```yaml config.yaml -version: "1" +You can load the execution config from a [storage provider](/router/storage-providers) or from a local file: -execution_config: - storage: - provider_id: s3 - object_path: /prod -``` + + ```yaml Storage provider + version: "1" + execution_config: + storage: + provider_id: s3 + object_path: "router.json" + ``` -or + ```yaml Local file + version: "1" + execution_config: + file: + path: "./__schemas/config.json" + watch: true + watch_interval: "1s" + ``` + + ```yaml Fallback storage + version: "1" + execution_config: + storage: + provider_id: s3 + object_path: "router.json" + fallback_storage: + enabled: true + provider_id: minio + object_path: "router.json" + ``` + -```yaml config.yaml -version: "1" +When using a storage provider, the `object_path` field points to the file in your bucket that is updated after each schema deployment in your CI/CD pipeline: -execution_config: - file: - path: "./__schemas/config.json" - watch: true - watch_interval: "1s" +```bash +# Publish your subgraph +wgc subgraph publish my-subgraph --schema ./schema.graphqls +# Download the latest execution config after successful composition +wgc router fetch mygraph -o router.json +# Upload the execution config to your S3 storage +aws s3 cp router.json s3://cosmo/ ``` +The router will check for updates every 10 seconds (default) and hot-reload accordingly without impacting current user traffic. + +You can configure a fallback storage for fetching the execution config in the event the router cannot reach the primary storage. You cannot use the same provider for both primary and fallback storage. + ### Execution config options | Environment Variable | YAML | Required | Description | Default Value | diff --git a/docs-website/router/persisted-queries/persisted-operations.mdx b/docs-website/router/persisted-queries/persisted-operations.mdx index 434e0641eb..7391394b98 100644 --- a/docs-website/router/persisted-queries/persisted-operations.mdx +++ b/docs-website/router/persisted-queries/persisted-operations.mdx @@ -25,29 +25,76 @@ The control plane replicates these operations in the Cosmo CDN, where the router Persisted operations require some tooling on the client side. Consult the documentation for your GraphQL client library to find out how to generate a query manifest or query map. -Once this list of operations has been generated, typically in your CI or CD pipeline, you can use [`wgc`](/cli/intro) to register your operations: +### Supported manifest formats + +`wgc operations push` automatically detects the format of your manifest file. The following formats are supported: + + + + The Apollo persisted query manifest format: + + ```json manifest.json + { + "format": "apollo-persisted-query-manifest", + "version": 1, + "operations": [ + { + "id": "dc67510fb4289672bea757e862d6b00e83db5d3c", + "name": "GetEmployees", + "type": "query", + "body": "query GetEmployees { employees { id } }" + } + ] + } + ``` + + + Relay query maps are supported in two formats — as an array of `[id, query]` pairs or as a `{id: query}` object: + + ```json relay-query-map.json + { + "dc67510fb4289672bea757e862d6b00e83db5d3c": "query GetEmployees { employees { id } }", + "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2": "mutation UpdateEmployee($id: ID!) { updateEmployee(id: $id) { id } }" + } + ``` + + ```bash + wgc operations push mygraph -n default -c web -f relay-query-map.json + ``` + + + Plain `.graphql` or `.gql` files containing a single operation. The SHA-256 hash is computed automatically: + + ```graphql operations.graphql + query GetEmployees { + employees { + id + } + } + ``` + + ```bash + wgc operations push mygraph -n default -c web -f operations.graphql + ``` + + + +### Pushing operations + +Once your manifest is generated, push it using `wgc`: ```bash wgc operations push mygraph -n default -c web -f my-operations-manifest.json ``` -This will register the operations for your federated graph named `mygraph` in the `default` namespace (as seen in the Studio) and your client named `web` (indicated by the `graphql-client-name` HTTP header), using the same operation identifiers as your library when possible. If your library doesn't generate these identifiers, Cosmo will automatically generate them. +This registers the operations for your federated graph named `mygraph` in the `default` namespace and your client named `web` (indicated by the `graphql-client-name` HTTP header). You can push multiple files at once using the `-f` flag multiple times. -When pushing the operations, you will see a short summary of the operations that were pushed, indicating how many were created and how many were already registered. Alternatively, the `--output` flag can be used to obtain a JSON summary that can easily be processed by your tooling. +When pushing the operations, you will see a short summary indicating how many were created and how many were already registered. Use `--format json` for machine-readable output: ```bash -wgc operations push mygraph -n default -c my-client -f persisted-query-manifest.json --format json (11-25 10:23) -{ - "2d9df67f96ce804da7a9107d33373132a53bf56aec29ef4b4e06569a43a16935": { - "body": "query Employees {\n employees {\n id\n role {\n department\n title\n __typename\n }\n details {\n forename\n surname\n location\n __typename\n }\n __typename\n }\n}", - "status": "up_to_date" - }, -... -} +wgc operations push mygraph -n default -c my-client -f manifest.json --format json ``` -Finally, you should enable persisted operations in your GraphQL client library. - To see all available options for `wgc operations push`, see [Push](/cli/operations/push). Additionally, check the [Using Persisted Operation with Federated GraphQL](/tutorial/using-persisted-operations) tutorial for a step-by-step guide. @@ -69,6 +116,50 @@ Operations are currently deleted one at a time from the UI. Studio always asks for confirmation before deleting operations. If traffic is detected for the selected operation, the dialog warns that the operation is receiving traffic. If analytics data is unavailable, Studio cannot guarantee that existing clients won't break. You can always check the metrics using the link in the dialog. +## PQL Manifest + +By default, the router fetches persisted operations individually from the Cosmo CDN on each request. When the **PQL manifest** is enabled, the router instead loads all persisted operations from a single `manifest.json` file at startup and serves them entirely from memory — eliminating per-request network overhead. + +```yaml +persisted_operations: + manifest: + enabled: true +``` + +The manifest is automatically updated in the Cosmo CDN whenever operations are added or deleted via `wgc operations push` or Studio. The router polls for updates using `poll_interval` and `poll_jitter`, picking up changes without requiring a restart. You can also load the manifest from a custom [storage provider](#using-a-custom-storage-provider). + + + When the manifest is enabled, it is **authoritative** — the router does not fall back to fetching individual operations from the CDN. Unknown operation hashes are rejected immediately. + + +### Cache warmup + +When the PQL manifest is enabled, the router automatically warms up its caches by pre-processing all operations from the manifest at startup and after each manifest update. Each operation goes through parsing, normalization, validation, and query planning — the same steps that happen on a regular request. Once warmed, every operation is served entirely from cache with zero processing overhead on the first request. + +Cache warmup is enabled by default and can be configured or disabled: + +```yaml +persisted_operations: + manifest: + enabled: true + warmup: + enabled: true # default: true + workers: 4 # number of concurrent workers (default: 4) + items_per_second: 50 # rate limit, 0 = unlimited (default: 50) + timeout: 30s # maximum time for warmup to complete (default: 30s) +``` + +| Option | Default | Description | +|--------|---------|-------------| +| `enabled` | `true` | Set to `false` to disable cache warmup for manifest operations. | +| `workers` | `4` | Number of concurrent workers used to pre-process operations. Increase for large manifests. | +| `items_per_second` | `50` | Rate limit for items processed per second. Set to `0` for unlimited throughput. | +| `timeout` | `30s` | Maximum time allowed for the warmup to complete. If the timeout is exceeded, the router logs an error but continues serving requests normally. | + +## Using a custom storage provider + +You can load persisted operations from your own S3-compatible storage instead of the Cosmo CDN. First, [define a storage provider](/router/storage-providers), then reference it in your persisted operations configuration. + ## Disallowing non-persisted Operations If you're going all in on Security, you'd want to only allow Persisted Operations in your Production Environment. diff --git a/docs-website/router/security/hardening-guide.mdx b/docs-website/router/security/hardening-guide.mdx index eca7b0ab72..3699bdd449 100644 --- a/docs-website/router/security/hardening-guide.mdx +++ b/docs-website/router/security/hardening-guide.mdx @@ -127,10 +127,13 @@ By default the subgraph routing URL from the [wgc subgraph create](/cli/subgraph [Persistent operations](/router/persisted-queries/persisted-operations) are a great way to save bandwidth but also to reduce the attack vectors by only allowing known queries to be executed. -By default, both persistent queries and regular queries are allowed. To allow only persistent queries, the following configuration should be applied: +We recommend enabling the [PQL manifest](/router/persisted-queries/persisted-operations#pql-manifest) to load all persisted operations at startup and serve them from memory. This eliminates per-request network calls and ensures the router has a complete, authoritative view of all allowed operations. ```yaml router.yaml + persisted_operations: + manifest: + enabled: true security: block_non_persisted_operations: enabled: true diff --git a/docs-website/router/storage-providers.mdx b/docs-website/router/storage-providers.mdx index 43fc7421ee..10f21362cc 100644 --- a/docs-website/router/storage-providers.mdx +++ b/docs-website/router/storage-providers.mdx @@ -5,9 +5,7 @@ description: "To maintain control over your data and ensure high performance, ut --- -The router is a stateless component, allowing it to be scaled horizontally without requiring persistence. However, it needs to fetch the execution configuration from a storage source at startup and when new schema updates are published. This execution config provides the instructions for executing and planning GraphQL operations. - -The same approach applies to persisted operations (PO), which manage an allowlist of permitted GraphQL operations that can execute against the router. The router must download these operations once before they can be executed. +The router is a stateless component, allowing it to be scaled horizontally without requiring persistence. However, it needs to fetch artifacts such as the execution configuration and persisted operations from a storage source. By default, the router fetches these from the Cosmo CDN. You can configure custom storage providers to use your own infrastructure instead. For both mechanisms, different storage providers can be used: @@ -15,18 +13,14 @@ For both mechanisms, different storage providers can be used: * **Amazon S3**: An object storage protocol. We support any S3 compatible object-storage e.g. Minio and AWS. - - By removing the dependency on Cosmo Cloud, your router no longer relies on us for operations while still benefiting from all its features. - - -When using a custom storage provider, you are responsible for manually pushing those artifacts as part of your CI process. In the next section, we will explain how to do this: +When using a custom storage provider, you are responsible for manually pushing those artifacts as part of your CI process. ## Define a provider -Before you can use a storage provider, you have to define it in the [`storage_providers`](/router/storage-providers) section of your `config.yaml` file. +Before you can use a storage provider, you have to define it in the `storage_providers` section of your `config.yaml` file. Each provider is given an `id` that you reference from other configuration sections. - ```bash config.yaml + ```yaml config.yaml version: 1 storage_providers: s3: @@ -47,7 +41,7 @@ Before you can use a storage provider, you have to define it in the [`storage_pr If you are using EC2 or EKS on AWS and have configured [`node IAM roles`](https://docs.aws.amazon.com/eks/latest/userguide/create-node-role.html) you don't need to provide an `access_key` or `secret_key` and the S3 client will handle this on your behalf. - ```bash config.yaml +```yaml config.yaml version: 1 storage_providers: s3: @@ -59,103 +53,13 @@ storage_providers: ``` -## Execution config - -After configuring the storage provider, you can use it by referencing it in the `provider_id` field. - - - ```bash config.yaml - version: 1 - execution_config: - storage: - provider_id: s3 - object_path: "router.json" - ``` - - -The `object_path` field points to the file in your bucket that is updated after each schema deployment in your CI/CD pipeline. The following snippet illustrates how it could look like: - -```bash -# Publish your subgraph -wgc subgraph publish my-subgraph --schema ./schema.graphqls -# Download the latest execution config after successful composition -wgc router fetch mygraph -o router.json -# Upload the execution config to your S3 storage -aws s3 cp router.json s3://cosmo/ -``` +## Using storage providers -First, we publish our schema changes to Cosmo. After that, we will download the latest execution config and upload it to your own S3 provider. The router will check for updates every 10 seconds (default) and hot-reload the router accordingly without impacting current user traffic. +Once a provider is defined, you reference it by `provider_id` in the configuration of each feature. The following features support custom storage providers: -### Fallback storage +* [**Execution config**](/router/configuration#execution-config-options) — load the router execution configuration from S3 instead of the Cosmo CDN. +* [**Persisted operations**](/router/persisted-queries/persisted-operations#using-a-custom-storage-provider) — load individual persisted operations or the PQL manifest from S3. -You can configure a fallback storage for fetching the execution config in the event the router cannot reach the primary storage. If enabled, it will default to using the Cosmo CDN but you can specify the provider as well. You cannot use the same provider for both primary and fallback storage. - - - ```yaml config.yaml - version: 1 - execution_config: - storage: - provider_id: s3 - object_path: "router.json" - fallback_storage: - enabled: true - provider_id: minio - object_path: "router.json" - ``` - - -### Best Practices +## Best Practices * Create different S3 credentials for READ and WRITE to reduce the attack surface. - -## Persisted Operations - -After configuring the storage provider, you can use it by referencing it in the `provider_id` field. - - - ```yaml config.yaml - version: 1 - persisted_operations: - cache: - size: 100MB - storage: - provider_id: s3 - object_prefix: "prod/operations" - ``` - - -The `object_prefix` field points to the location in the bucket where the persisted operations are uploaded. Each persisted operation needs to follow the naming convention: **SHA256** of the file + `.json` as filename extension. A persisted operation is a JSON document and must follow the following structure: - -* `version`: The version of the persisted operation format. - -* `body`: The actual content of the persistent operation. - -### Example - -Given the following persisted operation: - - - ```json c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2.json - {"version":1,"body":"{\n employees {\n id\n details {\n forename\n }\n }\n}"} - ``` - - -Upload the file to the bucket location as follows: - -```bash -# Upload the persisted operation to S3 -aws s3 cp c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2.json s3://cosmo/prod/operations/ -``` - -Now, you can make a persisted operation (PO) request against the router, and it will fetch the operation from your S3 and execute it. Subsequent requests are cached and won't add additional latency. - -```bash -curl 'http://localhost:3002/graphql' \ - -H 'graphql-client-name: test' \ - -H 'Content-Type: application/json' \ - -d '{"extensions":{"persistedQuery":{"version":1,"sha256Hash":"c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2"}}}' -``` - -### Best Practices - -* We can cache persisted operations effectively only by using an immutable filename, such as the SHA-256 hash of the file. diff --git a/docs-website/tutorial/using-persisted-operations.mdx b/docs-website/tutorial/using-persisted-operations.mdx index 258c1e85ad..ba5f910ecc 100644 --- a/docs-website/tutorial/using-persisted-operations.mdx +++ b/docs-website/tutorial/using-persisted-operations.mdx @@ -84,7 +84,7 @@ There are a few things to note from this command: * The first argument is the federated graph name to push the operations to. This is the federated graph we created while following , named `federation`. -* After the federated subgraph name, we must also indicate a client name. Persisted operations in Cosmo are always associated with a given client. If needed, Cosmo will automatically register the given client name the first time it sees it. During operation execution, the client name is obtained from the `graphql-client-name` HTTP header. +* After the federated graph name, we must also indicate a client name. Persisted operations in Cosmo are always associated with a given client. If needed, Cosmo will automatically register the given client name the first time it sees it. During operation execution, the client name is obtained from the `graphql-client-name` HTTP header. When the router runs in [manifest mode](/router/persisted-queries/persisted-operations#pql-manifest), the client name is used only to structure operations in Studio. * Finally, we specify one or more files that contain GraphQL operations. Here we're using a plain `.graphql` file, but other formats are also supported, including: @@ -116,8 +116,26 @@ curl 'http://127.0.0.1:3002/graphql' \ This will return the same data as executing the operation by returning its contents. +## Using the PQL Manifest + +Instead of having the router fetch each persisted operation individually from the CDN on each request, you can enable the **PQL manifest**. In this mode, the router loads all persisted operations from a single `manifest.json` file at startup and serves them. + +Add the following to your router configuration: + +```yaml +persisted_operations: + manifest: + enabled: true +``` + +The manifest is automatically kept in sync with the Cosmo CDN whenever you push or delete operations. The router polls for updates and hot-reloads without a restart. + +You can also load the manifest from a custom [storage provider](/router/storage-providers) (e.g. S3-compatible storage) instead of the Cosmo CDN. For details, see [PQL Manifest](/router/persisted-queries/persisted-operations#pql-manifest). + ## Further information -* Check the router documentation for [Persisted Operations](/router/persisted-queries/persisted-operations). +* [Persisted Operations](/router/persisted-queries/persisted-operations) — full reference including manifest formats, manifest mode, custom storage providers, and security options. + +* [Push command](/cli/operations/push) — CLI reference for `wgc operations push`. -* Check the documentation for [Push](/cli/operations/push) command. +* [Storage Providers](/router/storage-providers) — configure S3 or other storage backends for router artifacts. diff --git a/router-tests/operations/pql_manifest_test.go b/router-tests/operations/pql_manifest_test.go new file mode 100644 index 0000000000..cdba64f0c3 --- /dev/null +++ b/router-tests/operations/pql_manifest_test.go @@ -0,0 +1,761 @@ +package integration + +import ( + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/wundergraph/cosmo/router-tests/testenv" + "github.com/wundergraph/cosmo/router/core" + "github.com/wundergraph/cosmo/router/pkg/config" + "go.uber.org/zap/zapcore" +) + +// getCDNRequests returns all recorded HTTP requests from the CDN test server. +// The CDN test server records every request path it receives. Calling GET on +// its base URL returns these as a JSON array of strings (e.g. "GET /org/graph/operations/..."). +func getCDNRequests(t *testing.T, cdnURL string) []string { + t.Helper() + resp, err := http.Get(cdnURL) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var requests []string + err = json.Unmarshal(body, &requests) + require.NoError(t, err) + return requests +} + +func TestPQLManifest(t *testing.T) { + t.Parallel() + + expectedEmployeesBody := `{"data":{"employees":[{"id":1},{"id":2},{"id":3},{"id":4},{"id":5},{"id":7},{"id":8},{"id":10},{"id":11},{"id":12}]}}` + persistedNotFoundResp := `{"errors":[{"message":"PersistedQueryNotFound","extensions":{"code":"PERSISTED_QUERY_NOT_FOUND"}}]}` + + manifestConfig := config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + } + + manifestConfigWithWarmup := config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + Warmup: config.PQLManifestWarmupConfig{ + Enabled: true, + Workers: 4, + Timeout: 30 * time.Second, + }, + }, + } + + t.Run("lookup succeeds for known operations", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfig), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.InfoLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // Verify startup log + logEntries := xEnv.Observer().FilterMessageSnippet("Loaded initial PQL manifest").All() + require.Len(t, logEntries, 1) + }) + }) + + t.Run("rejects unknown operation hash", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfig), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "0000000000000000000000000000000000000000000000000000000000000000"}}`), + }) + require.Equal(t, persistedNotFoundResp, res.Body) + }) + }) + + t.Run("no CDN requests for individual operations", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfig), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // Make multiple requests + for i := 0; i < 3; i++ { + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + } + + // With manifest enabled, the router should never call CDN for individual operations + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { + require.False(t, strings.Contains(req, "/operations/my-client/"), + "expected no individual operation CDN requests, but got: %s", req) + } + }) + }) + + t.Run("defaults to Cosmo CDN when no storage provider configured", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfig), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + hasManifestRequest := false + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + require.False(t, strings.Contains(req, "/operations/my-client/"), + "expected no individual operation CDN requests, but got: %s", req) + } + require.True(t, hasManifestRequest, "CDN should be called for manifest when no storage provider is configured") + }) + }) + + t.Run("safelist with manifest allows known queries", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + Safelist: config.SafelistConfiguration{Enabled: true}, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: "query Employees {\n employees {\n id\n }\n}", + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + }) + }) + + t.Run("safelist with manifest rejects unknown queries", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + Safelist: config.SafelistConfiguration{Enabled: true}, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: "query Employees {\n\n\n employees {\n id\n }\n}", + }) + require.NoError(t, err) + require.Equal(t, persistedNotFoundResp, res.Body) + }) + }) + + t.Run("log_unknown with manifest logs and allows unknown queries", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + LogUnknown: true, + }), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.InfoLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + nonPersistedQuery := "query Employees {\n\n\n employees {\n id\n }\n}" + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: nonPersistedQuery, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() + require.Len(t, logEntries, 1) + requestContext := logEntries[0].ContextMap() + require.Equal(t, nonPersistedQuery, requestContext["query"]) + }) + }) + + t.Run("log_unknown with manifest returns not found for hash-only request", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + LogUnknown: true, + }), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.WarnLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // Hash-only request with no query body — should return PersistedQueryNotFound, not "empty request body" + res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "0000000000000000000000000000000000000000000000000000000000000000"}}`), + Header: header, + }) + require.Equal(t, persistedNotFoundResp, res.Body) + + logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() + require.Len(t, logEntries, 1) + }) + }) + + t.Run("without manifest CDN is used for individual operations", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{}, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + hasOperationRequest := false + hasManifestRequest := false + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { + if strings.Contains(req, "/operations/my-client/") { + hasOperationRequest = true + } + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + } + require.True(t, hasOperationRequest, "CDN should be called for individual operations when manifest is disabled") + require.False(t, hasManifestRequest, "CDN should not fetch manifest when manifest is disabled") + }) + }) + + t.Run("without manifest safelist still uses CDN", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Safelist: config.SafelistConfiguration{Enabled: true}, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // Known persisted query should succeed via CDN lookup + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: "query Employees {\n employees {\n id\n }\n}", + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // Unknown query should be rejected + res, err = xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: "query Employees {\n\n\n employees {\n id\n }\n}", + }) + require.NoError(t, err) + require.Equal(t, persistedNotFoundResp, res.Body) + + hasOperationRequest := false + hasManifestRequest := false + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { + if strings.Contains(req, "/operations/my-client/") { + hasOperationRequest = true + } + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + } + require.True(t, hasOperationRequest, "CDN should be called for individual operations when manifest is disabled") + require.False(t, hasManifestRequest, "CDN should not fetch manifest when manifest is disabled") + }) + }) + + t.Run("without manifest log_unknown still uses CDN", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + LogUnknown: true, + }), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.InfoLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + nonPersistedQuery := "query Employees {\n\n\n employees {\n id\n }\n}" + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // Unknown query should be logged but allowed + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: nonPersistedQuery, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() + require.Len(t, logEntries, 1) + + hasManifestRequest := false + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + } + require.False(t, hasManifestRequest, "CDN should not fetch manifest when manifest is disabled") + }) + }) + + t.Run("log_unknown with safelist and manifest logs and rejects unknown queries", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + LogUnknown: true, + Safelist: config.SafelistConfiguration{Enabled: true}, + }), + }, + LogObservation: testenv.LogObservationConfig{ + Enabled: true, + LogLevel: zapcore.InfoLevel, + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + nonPersistedQuery := "query Employees {\n\n\n employees {\n id\n }\n}" + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Header: header, + Query: nonPersistedQuery, + }) + require.NoError(t, err) + require.Equal(t, persistedNotFoundResp, res.Body) + + logEntries := xEnv.Observer().FilterMessageSnippet("Unknown persisted operation found").All() + require.Len(t, logEntries, 1) + requestContext := logEntries[0].ContextMap() + require.Equal(t, nonPersistedQuery, requestContext["query"]) + }) + }) + + t.Run("manifest reload preserves cache hits", func(t *testing.T) { + t.Parallel() + + employeesHash := "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f" + employeesQuery := "query Employees {\n employees {\n id\n }\n}" + + manifestV1, _ := json.Marshal(map[string]interface{}{ + "version": 1, + "revision": "rev-v1", + "generatedAt": "2024-01-01T00:00:00Z", + "operations": map[string]string{ + employeesHash: employeesQuery, + }, + }) + // manifestV2 has the same operation but a new revision + manifestV2, _ := json.Marshal(map[string]interface{}{ + "version": 1, + "revision": "rev-v2", + "generatedAt": "2024-01-02T00:00:00Z", + "operations": map[string]string{ + employeesHash: employeesQuery, + }, + }) + + var currentManifest atomic.Value + currentManifest.Store(manifestV1) + + var manifestFetchCount atomic.Int32 + + cdnServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/operations/manifest.json") { + manifest := currentManifest.Load().([]byte) + + var m struct { + Revision string `json:"revision"` + } + _ = json.Unmarshal(manifest, &m) + + ifNoneMatch := r.Header.Get("If-None-Match") + if ifNoneMatch == `"`+m.Revision+`"` { + w.Header().Set("ETag", ifNoneMatch) + w.WriteHeader(http.StatusNotModified) + return + } + + manifestFetchCount.Add(1) + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", `"`+m.Revision+`"`) + w.WriteHeader(http.StatusOK) + _, _ = w.Write(manifest) + return + } + + w.WriteHeader(http.StatusNotFound) + })) + defer cdnServer.Close() + + testenv.Run(t, &testenv.Config{ + CdnSever: cdnServer, + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 100 * time.Millisecond, + PollJitter: 5 * time.Millisecond, + Warmup: config.PQLManifestWarmupConfig{ + Enabled: true, + Workers: 4, + Timeout: 30 * time.Second, + }, + }, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // 1. First request is a cache HIT from warmup + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "` + employeesHash + `"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + + // 2. Swap to manifest v2 (new revision, same operations) + currentManifest.Store(manifestV2) + + // 3. Wait for the poller to pick up the new manifest + require.Eventually(t, func() bool { + return manifestFetchCount.Load() >= 2 + }, 5*time.Second, 50*time.Millisecond) + + // 4. After manifest reload, the operation should still be a cache HIT + // because the SHA is the same — no revision in the cache key. + res, err = xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "` + employeesHash + `"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + }) + }) + + t.Run("manifest warmup serves first request from cache", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfigWithWarmup), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // The very first request should hit ALL caches because the manifest warmup + // pre-processed all operations through the full pipeline at startup. + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.NormalizationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.VariablesNormalizationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.VariablesRemappingCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.ExecutionPlanCacheHeader)) + }) + }) + + t.Run("manifest warmup cache hit is independent of client name", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(manifestConfigWithWarmup), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // Warmup runs without a client name. Requests from any client should still hit + // all caches because PQL manifest cache keys exclude clientName. + for _, clientName := range []string{"client-a", "client-b", "another-client"} { + header := make(http.Header) + header.Add("graphql-client-name", clientName) + + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader), + "expected persisted operation cache HIT for client %q", clientName) + require.Equal(t, "HIT", res.Response.Header.Get(core.NormalizationCacheHeader), + "expected normalization cache HIT for client %q", clientName) + require.Equal(t, "HIT", res.Response.Header.Get(core.VariablesNormalizationCacheHeader), + "expected variables normalization cache HIT for client %q", clientName) + require.Equal(t, "HIT", res.Response.Header.Get(core.VariablesRemappingCacheHeader), + "expected variables remapping cache HIT for client %q", clientName) + require.Equal(t, "HIT", res.Response.Header.Get(core.ExecutionPlanCacheHeader), + "expected execution plan cache HIT for client %q", clientName) + } + }) + }) + + t.Run("disabled persisted operations suppresses manifest", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Disabled: true, + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + // With persisted operations disabled, manifest should not load. + // A regular query should still work. + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + Query: "query { employees { id } }", + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + + // No manifest requests should be made to CDN + hasManifestRequest := false + for _, req := range getCDNRequests(t, xEnv.CDN.URL) { + if strings.Contains(req, "/operations/manifest.json") { + hasManifestRequest = true + } + } + require.False(t, hasManifestRequest, "CDN should not fetch manifest when persisted operations are disabled") + }) + }) + + t.Run("filesystem provider rejected for manifest", func(t *testing.T) { + t.Parallel() + testenv.FailsOnStartup(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + Storage: config.PersistedOperationsStorageConfig{ + ProviderID: "local", + }, + }), + core.WithStorageProviders(config.StorageProviders{ + FileSystem: []config.FileSystemStorageProvider{ + {ID: "local", Path: "."}, + }, + }), + }, + }, func(t *testing.T, err error) { + require.ErrorContains(t, err, "filesystem storage provider") + require.ErrorContains(t, err, "not supported for PQL manifest") + }) + }) + + t.Run("warmup disabled skips cache pre-processing", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + Warmup: config.PQLManifestWarmupConfig{ + Enabled: false, + }, + }, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // With warmup disabled, the first request should still resolve the persisted operation + // from the manifest, but all processing caches should be cold. + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "MISS", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + require.Equal(t, "MISS", res.Response.Header.Get(core.NormalizationCacheHeader)) + require.Equal(t, "MISS", res.Response.Header.Get(core.ExecutionPlanCacheHeader)) + }) + }) + + t.Run("warmup with custom workers and timeout", func(t *testing.T) { + t.Parallel() + testenv.Run(t, &testenv.Config{ + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + Warmup: config.PQLManifestWarmupConfig{ + Enabled: true, + Workers: 2, + ItemsPerSecond: 100, + Timeout: 10 * time.Second, + }, + }, + }), + }, + }, func(t *testing.T, xEnv *testenv.Environment) { + header := make(http.Header) + header.Add("graphql-client-name", "my-client") + + // With custom warmup config, all caches should still be warm on the first request. + res, err := xEnv.MakeGraphQLRequest(testenv.GraphQLRequest{ + OperationName: []byte(`"Employees"`), + Extensions: []byte(`{"persistedQuery": {"version": 1, "sha256Hash": "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f"}}`), + Header: header, + }) + require.NoError(t, err) + require.Equal(t, expectedEmployeesBody, res.Body) + require.Equal(t, "HIT", res.Response.Header.Get(core.PersistedOperationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.NormalizationCacheHeader)) + require.Equal(t, "HIT", res.Response.Header.Get(core.ExecutionPlanCacheHeader)) + }) + }) + + t.Run("fails to start when initial CDN manifest fetch fails", func(t *testing.T) { + t.Parallel() + + cdnServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/operations/manifest.json") { + // Return 404 (not 500) to avoid retryablehttp's 5 retries with exponential backoff. + w.WriteHeader(http.StatusNotFound) + return + } + w.WriteHeader(http.StatusNotFound) + })) + defer cdnServer.Close() + + testenv.FailsOnStartup(t, &testenv.Config{ + CdnSever: cdnServer, + RouterOptions: []core.Option{ + core.WithPersistedOperationsConfig(config.PersistedOperationsConfig{ + Manifest: config.PQLManifestConfig{ + Enabled: true, + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + }, + }), + }, + }, func(t *testing.T, err error) { + require.ErrorContains(t, err, "PQL manifest not found on CDN") + }) + }) +} diff --git a/router-tests/testenv/testdata/cdn/organization/graph/operations/manifest.json b/router-tests/testenv/testdata/cdn/organization/graph/operations/manifest.json new file mode 100644 index 0000000000..711eb32d4f --- /dev/null +++ b/router-tests/testenv/testdata/cdn/organization/graph/operations/manifest.json @@ -0,0 +1,10 @@ +{ + "version": 1, + "revision": "test-revision-001", + "generatedAt": "2024-01-01T00:00:00Z", + "operations": { + "dc67510fb4289672bea757e862d6b00e83db5d3cbbcfb15260601b6f29bb2b8f": "query Employees {\n employees {\n id\n }\n}", + "33651da3d80e420709520fb900c7ab8ec4151555da56062feeee428cf7f3a5dd": "query Employees {\n employees {\n id\n }\n}", + "9015ddfadd802bb378a14e48cea51e9bf9a07c7f8a71d85c56d7b104fea84937": "query Employees {\n employees {\n id\n }\n}" + } +} diff --git a/router/core/cache_warmup.go b/router/core/cache_warmup.go index 67af83b158..9204d4091f 100644 --- a/router/core/cache_warmup.go +++ b/router/core/cache_warmup.go @@ -62,7 +62,7 @@ func WarmupCaches(ctx context.Context, cfg *CacheWarmupConfig) (err error) { if cfg.Timeout <= 0 { w.timeout = time.Second * 30 } - w.log.Info("Warmup started", + w.log.Debug("Warmup started", zap.Int("workers", cfg.Workers), zap.Int("items_per_second", cfg.ItemsPerSecond), zap.Duration("timeout", cfg.Timeout), @@ -84,7 +84,7 @@ func WarmupCaches(ctx context.Context, cfg *CacheWarmupConfig) (err error) { ) return err } - w.log.Info("Warmup completed", + w.log.Debug("Warmup completed", zap.Int("processed_items", completed), zap.Duration("duration", time.Since(start)), ) @@ -123,7 +123,7 @@ func (w *cacheWarmup) run(ctx context.Context) (int, error) { return 0, nil } - w.log.Info("Starting processing", + w.log.Debug("Starting processing", zap.Int("items", len(items)), ) diff --git a/router/core/cache_warmup_manifest.go b/router/core/cache_warmup_manifest.go new file mode 100644 index 0000000000..0a886da6df --- /dev/null +++ b/router/core/cache_warmup_manifest.go @@ -0,0 +1,47 @@ +package core + +import ( + "context" + + nodev1 "github.com/wundergraph/cosmo/router/gen/proto/wg/cosmo/node/v1" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" + "go.uber.org/zap" +) + +var _ CacheWarmupSource = (*ManifestWarmupSource)(nil) + +type ManifestWarmupSource struct { + store *pqlmanifest.Store +} + +func NewManifestWarmupSource(store *pqlmanifest.Store) *ManifestWarmupSource { + return &ManifestWarmupSource{ + store: store, + } +} + +func (s *ManifestWarmupSource) LoadItems(ctx context.Context, log *zap.Logger) ([]*nodev1.Operation, error) { + ops := s.store.AllOperations() + if len(ops) == 0 { + log.Debug("No operations in PQL manifest for warmup") + return nil, nil + } + + items := make([]*nodev1.Operation, 0, len(ops)) + for sha256Hash, body := range ops { + items = append(items, &nodev1.Operation{ + Request: &nodev1.OperationRequest{ + Query: body, + Extensions: &nodev1.Extension{ + PersistedQuery: &nodev1.PersistedQuery{ + Sha256Hash: sha256Hash, + Version: 1, + }, + }, + }, + }) + } + + log.Debug("Loaded PQL manifest operations for warmup", zap.Int("count", len(items))) + return items, nil +} diff --git a/router/core/cache_warmup_manifest_test.go b/router/core/cache_warmup_manifest_test.go new file mode 100644 index 0000000000..05aadbab9a --- /dev/null +++ b/router/core/cache_warmup_manifest_test.go @@ -0,0 +1,86 @@ +package core + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" + "go.uber.org/zap" +) + +func TestManifestWarmupSource(t *testing.T) { + t.Parallel() + + t.Run("returns nil when store has no manifest", func(t *testing.T) { + t.Parallel() + store := pqlmanifest.NewStore(zap.NewNop()) + source := NewManifestWarmupSource(store) + + items, err := source.LoadItems(context.Background(), zap.NewNop()) + require.NoError(t, err) + require.Nil(t, items) + }) + + t.Run("returns nil when manifest has no operations", func(t *testing.T) { + t.Parallel() + store := pqlmanifest.NewStore(zap.NewNop()) + store.Load(&pqlmanifest.Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{}, + }) + source := NewManifestWarmupSource(store) + + items, err := source.LoadItems(context.Background(), zap.NewNop()) + require.NoError(t, err) + require.Nil(t, items) + }) + + t.Run("returns all operations with persisted query extensions", func(t *testing.T) { + t.Parallel() + store := pqlmanifest.NewStore(zap.NewNop()) + store.Load(&pqlmanifest.Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{ + "sha256abc": "query Employees { employees { id } }", + "sha256def": "mutation CreateUser { createUser { id } }", + }, + }) + source := NewManifestWarmupSource(store) + + items, err := source.LoadItems(context.Background(), zap.NewNop()) + require.NoError(t, err) + require.Len(t, items, 2) + + // Collect items into a map for deterministic assertions (map iteration is unordered) + byHash := make(map[string]string) + for _, item := range items { + require.NotNil(t, item.Request) + require.NotNil(t, item.Request.Extensions) + require.NotNil(t, item.Request.Extensions.PersistedQuery) + require.Equal(t, int32(1), item.Request.Extensions.PersistedQuery.Version) + byHash[item.Request.Extensions.PersistedQuery.Sha256Hash] = item.Request.Query + } + + require.Equal(t, "query Employees { employees { id } }", byHash["sha256abc"]) + require.Equal(t, "mutation CreateUser { createUser { id } }", byHash["sha256def"]) + }) + + t.Run("does not include client info", func(t *testing.T) { + t.Parallel() + store := pqlmanifest.NewStore(zap.NewNop()) + store.Load(&pqlmanifest.Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{"hash1": "query { a }"}, + }) + source := NewManifestWarmupSource(store) + + items, err := source.LoadItems(context.Background(), zap.NewNop()) + require.NoError(t, err) + require.Len(t, items, 1) + require.Nil(t, items[0].Client) + }) +} diff --git a/router/core/graph_server.go b/router/core/graph_server.go index d9e73be51f..94baf88bc0 100644 --- a/router/core/graph_server.go +++ b/router/core/graph_server.go @@ -1450,6 +1450,56 @@ func (s *graphServer) buildGraphMux( } } + // Prewarm all persisted operations from the PQL manifest so that the first request is served from cache. + // This runs independently of the cache warmup configuration above. + manifestWarmup := s.persistedOperationsConfig.Manifest.Warmup + if manifestWarmup.Enabled && s.persistedOperationClient != nil { + if pqlStore := s.persistedOperationClient.PQLStore(); pqlStore != nil && pqlStore.IsLoaded() { + manifestProcessor := NewCacheWarmupPlanningProcessor(&CacheWarmupPlanningProcessorOptions{ + OperationProcessor: operationProcessor, + OperationPlanner: operationPlanner, + ComplexityLimits: s.securityConfiguration.ComplexityLimits, + RouterSchema: executor.RouterSchema, + TrackSchemaUsage: s.graphqlMetricsConfig.Enabled, + DisableVariablesRemapping: s.engineExecutionConfiguration.DisableVariablesRemapping, + }) + + manifestWarmupConfig := &CacheWarmupConfig{ + Log: s.logger, + Processor: manifestProcessor, + Workers: manifestWarmup.Workers, + ItemsPerSecond: manifestWarmup.ItemsPerSecond, + Timeout: manifestWarmup.Timeout, + Source: NewManifestWarmupSource(pqlStore), + } + + err = WarmupCaches(ctx, manifestWarmupConfig) + if err != nil { + s.logger.Error("Failed to warmup PQL manifest operations", zap.Error(err)) + } + + // Re-warm when the manifest is updated by the poller. + // The callback runs in a new goroutine to avoid blocking the poll loop. + pqlStore.SetOnUpdate(func() { + rewarmCtx, cancel := context.WithTimeout(context.Background(), manifestWarmup.Timeout) + defer cancel() + + rewarmConfig := &CacheWarmupConfig{ + Log: s.logger, + Processor: manifestProcessor, + Workers: manifestWarmup.Workers, + ItemsPerSecond: manifestWarmup.ItemsPerSecond, + Timeout: manifestWarmup.Timeout, + Source: NewManifestWarmupSource(pqlStore), + } + + if rewarmErr := WarmupCaches(rewarmCtx, rewarmConfig); rewarmErr != nil { + s.logger.Error("Failed to re-warm PQL manifest operations after update", zap.Error(rewarmErr)) + } + }) + } + } + authorizerOptions := &CosmoAuthorizerOptions{ FieldConfigurations: opts.EngineConfig.FieldConfigurations, RejectOperationIfUnauthorized: false, diff --git a/router/core/graphql_prehandler.go b/router/core/graphql_prehandler.go index d8535e4a98..122f848cd0 100644 --- a/router/core/graphql_prehandler.go +++ b/router/core/graphql_prehandler.go @@ -629,11 +629,16 @@ func (h *PreHandler) handleOperation(req *http.Request, httpOperation *httpOpera var poNotFoundErr *persistedoperation.PersistentOperationNotFoundError if h.operationBlocker.logUnknownOperationsEnabled && errors.As(err, &poNotFoundErr) { requestContext.logger.Warn("Unknown persisted operation found", zap.String("query", operationKit.parsedOperation.Request.Query), zap.String("sha256Hash", poNotFoundErr.Sha256Hash)) - if h.operationBlocker.safelistEnabled { - span.End() - return err + // When log_unknown is enabled, ad-hoc queries whose hash doesn't match a + // persisted operation are logged above. We only allow execution to continue + // when the request includes a query body (the ad-hoc query to run) and + // safelist is not enforced. Hash-only requests without a body have nothing + // to execute, so we always return the not-found error in that case. + if !h.operationBlocker.safelistEnabled && operationKit.parsedOperation.Request.Query != "" { + err = nil } - } else { + } + if err != nil { span.End() return err } diff --git a/router/core/operation_processor.go b/router/core/operation_processor.go index dc84455e89..fafa168a94 100644 --- a/router/core/operation_processor.go +++ b/router/core/operation_processor.go @@ -1241,7 +1241,17 @@ func (o *OperationKit) generatePersistedOperationCacheKey(clientName string, ski // If there are multiple operations in the document, we need to include the operation name in the cache key _, _ = o.kit.keyGen.WriteString(o.parsedOperation.Request.OperationName) } - _, _ = o.kit.keyGen.WriteString(clientName) + manifestEnabled := o.operationProcessor.persistedOperationClient != nil && + o.operationProcessor.persistedOperationClient.ManifestEnabled() + + if !manifestEnabled { + // Non-manifest mode: include clientName since operations are per-client. + // Manifest mode: exclude clientName because manifest operations are global + // and the SHA256 hash already uniquely identifies the operation body. + // Cache entries persist across manifest reloads — removed operations are + // naturally evicted by the LRU. + _, _ = o.kit.keyGen.WriteString(clientName) + } o.writeSkipIncludeCacheKeyToKeyGen(skipIncludeVariableNames) sum := o.kit.keyGen.Sum64() o.kit.keyGen.Reset() diff --git a/router/core/router.go b/router/core/router.go index 34e619e0da..6ec2dfa316 100644 --- a/router/core/router.go +++ b/router/core/router.go @@ -10,6 +10,7 @@ import ( "net/http" "net/url" "os" + "path" "sync" "time" @@ -36,6 +37,7 @@ import ( "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage/cdn" "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage/fs" "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage/s3" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" rd "github.com/wundergraph/cosmo/router/internal/rediscloser" "github.com/wundergraph/cosmo/router/internal/retrytransport" "github.com/wundergraph/cosmo/router/internal/stringsx" @@ -526,7 +528,6 @@ func NewRouter(opts ...Option) (*Router, error) { r.engineExecutionConfiguration.Debug.EnableCacheResponseHeaders = true } - if r.securityConfiguration.DepthLimit != nil { r.logger.Warn("The security configuration field 'depth_limit' is deprecated, and will be removed. Use 'security.complexity_limits.depth' instead.") @@ -1096,7 +1097,7 @@ func (r *Router) bootstrap(ctx context.Context) error { r.staticExecutionConfig = executionConfig } - if err := r.buildClients(); err != nil { + if err := r.buildClients(ctx); err != nil { return err } @@ -1119,7 +1120,7 @@ func (r *Router) bootstrap(ctx context.Context) error { } // buildClients initializes the storage clients for persisted operations and router config. -func (r *Router) buildClients() error { +func (r *Router) buildClients(ctx context.Context) error { s3Providers := map[string]config.S3StorageProvider{} cdnProviders := map[string]config.CDNStorageProvider{} redisProviders := map[string]config.RedisStorageProvider{} @@ -1153,6 +1154,9 @@ func (r *Router) buildClients() error { fileSystemProviders[provider.ID] = provider } + // Create the storage client for persisted operations based on the configured provider. + // The same client is reused for manifest fetching when the manifest feature is enabled, + // since both features are exclusive (manifest replaces individual operation fetches). var pClient persistedoperation.StorageClient if !r.persistedOperationsConfig.Disabled { @@ -1165,7 +1169,7 @@ func (r *Router) buildClients() error { Logger: r.logger, }) if err != nil { - return err + return fmt.Errorf("failed to create CDN client: %w", err) } pClient = c @@ -1184,7 +1188,7 @@ func (r *Router) buildClients() error { TraceProvider: r.tracerProvider, }) if err != nil { - return err + return fmt.Errorf("failed to create S3 client: %w", err) } pClient = c @@ -1196,7 +1200,7 @@ func (r *Router) buildClients() error { ObjectPathPrefix: r.persistedOperationsConfig.Storage.ObjectPrefix, }) if err != nil { - return err + return fmt.Errorf("failed to create filesystem client: %w", err) } pClient = c @@ -1212,7 +1216,7 @@ func (r *Router) buildClients() error { Logger: r.logger, }) if err != nil { - return err + return fmt.Errorf("failed to create CDN client: %w", err) } pClient = c @@ -1251,7 +1255,69 @@ func (r *Router) buildClients() error { } } - if pClient != nil || apqClient != nil { + var pqlStore *pqlmanifest.Store + + if r.persistedOperationsConfig.Manifest.Enabled && !r.persistedOperationsConfig.Disabled { + const manifestFileName = "manifest.json" + + storageProviderID := r.persistedOperationsConfig.Storage.ProviderID + + if _, ok := fileSystemProviders[storageProviderID]; ok { + return fmt.Errorf("filesystem storage provider %q is not supported for PQL manifest; use S3 or CDN instead", storageProviderID) + } + + if storageProviderID != "" { + // An explicit storage provider is configured — read the manifest once at startup. + objectPrefix := r.persistedOperationsConfig.Storage.ObjectPrefix + objectPath := manifestFileName + if objectPrefix != "" { + objectPath = path.Join(objectPrefix, manifestFileName) + } + + manifest, err := pClient.ReadManifest(ctx, objectPath) + if err != nil { + return fmt.Errorf("failed to fetch PQL manifest from storage provider %q: %w", + storageProviderID, err) + } + + pqlStore = pqlmanifest.NewStore(r.logger) + pqlStore.Load(manifest) + r.logger.Info("Loaded PQL manifest from storage provider", + zap.String("provider_id", storageProviderID), + zap.Int("operations", pqlStore.OperationCount()), + ) + } else { + // No storage provider configured — fetch manifest from Cosmo CDN and poll for updates. + if r.graphApiToken == "" { + return errors.New("graph token is required for PQL manifest") + } + + fetcher, err := pqlmanifest.NewFetcher(r.cdnConfig.URL, r.graphApiToken, r.logger) + if err != nil { + return fmt.Errorf("failed to create PQL manifest fetcher: %w", err) + } + + poller := pqlmanifest.NewPoller( + fetcher, + r.persistedOperationsConfig.Manifest.PollInterval, + r.persistedOperationsConfig.Manifest.PollJitter, + r.logger, + ) + + if err := poller.FetchInitial(ctx); err != nil { + return fmt.Errorf("failed to fetch initial PQL manifest: %w", err) + } + + go poller.Poll(ctx) + + pqlStore = fetcher.Store() + } + + // Manifest is authoritative — individual operation fetches are not needed. + pClient = nil + } + + if pClient != nil || apqClient != nil || pqlStore != nil { // For backwards compatibility with cdn config field cacheSize := r.persistedOperationsConfig.Cache.Size.Uint64() if cacheSize <= 0 { @@ -1263,6 +1329,7 @@ func (r *Router) buildClients() error { Logger: r.logger, ProviderClient: pClient, ApqClient: apqClient, + PQLStore: pqlStore, }) if err != nil { return err diff --git a/router/internal/persistedoperation/README.md b/router/internal/persistedoperation/README.md index 7760a97463..d780333150 100644 --- a/router/internal/persistedoperation/README.md +++ b/router/internal/persistedoperation/README.md @@ -1,17 +1,38 @@ # Persisted Operations -Persisted operations are stored queries, which can be executed just by providing the sha256hash of the operation to the router. This is useful for multiple purposes, including: -* large/frequently requested queries, which can be stored to avoid sending them over the network multiple times -* for security purposes, where a consumer can specify the specific operations which can be run, and the router can verify that the operation is one of the allowed ones +Persisted operations are stored queries, which can be executed just by providing the sha256hash of the operation to the router. This is useful for multiple purposes, including: -Specifically for those two purposes, we enable two different methods of storing persisted operations: -1. **Persisted Operation Files** - This operation, documented [here](https://cosmo-docs.wundergraph.com/router/persisted-operations), allows users to store persisted operations in files in a CDN/S3 bucket, which are then loaded by the router. This is both useful for storing large queries, as well as by reducing the router's attack surface by only allowing registered operations -2. **Automatic Persisted Queries** - This setting allows users to automatically cache queries that are sent, as long as they are sent together with their sha256hash. This is a useful performance optimizer, as it allows the router to cache queries that are frequently requested, without the need to manually store them in a file. +- large/frequently requested queries, which can be stored to avoid sending them over the network multiple times +- for security purposes, where a consumer can specify the specific operations which can be run, and the router can verify that the operation is one of the allowed ones -These two uses can exist in concert - users can save a number of particular operations in persisted operation files, and then use automatic persisted queries to cache the rest of the queries that are sent to the router. +Specifically for those purposes, we enable three different methods of storing persisted operations: + +1. **Persisted Operation Files** - This operation, documented [here](https://cosmo-docs.wundergraph.com/router/persisted-queries/persisted-operations), allows users to store persisted operations in files in a CDN/S3 bucket, which are then loaded by the router individually per request. This is useful for storing large queries and for only allowing registered operations. +2. **PQL Manifest (Recommended)** - When enabled, the router loads a single JSON manifest (`manifest.json`) containing all persisted operations at startup. The manifest uses the same `storage` config as persisted operations (both features are exclusive). When a `storage.provider_id` is configured, the manifest is loaded from that provider (S3, CDN, or filesystem) at startup (the file is resolved as `/manifest.json`). When no storage provider is configured, the router fetches from the Cosmo CDN and polls for updates periodically. Operations are resolved entirely in-memory with zero per-request network overhead. When the manifest is enabled, it is authoritative — no fallback occurs for individual operations. We suggest using the PQL Manifest as the preferred method for persisted operations. See the `pqlmanifest` subpackage. +3. **Automatic Persisted Queries** - This setting allows users to automatically cache queries that are sent, as long as they are sent together with their sha256hash. This is a useful performance optimizer, as it allows the router to cache queries that are frequently requested, without the need to manually store them in a file. + +These methods can exist in concert — for example, users can enable the PQL manifest for zero-latency lookups and use APQ to cache ad-hoc queries. + +## Lookup Order + +When a persisted operation request arrives, the router resolves it in this order: + +1. **APQ cache** — if APQ is enabled and the hash is cached, use it +2. **In-memory normalization cache** — if the operation was previously resolved and cached locally +3. **PQL manifest** — if a manifest is loaded, look up the hash in-memory. If found, return the body. If not found, the manifest is authoritative: the operation does not exist (no CDN fallback) +4. **CDN/S3/FS fallback** — only when the manifest is **not** enabled, fetch the individual operation file from CDN, S3, or the filesystem ## Flows -1. **Persisted Operations, no APQ** → In this scenario, the router will only execute queries that are stored in persisted operation files. If a query is not found in the persisted operation files, the router will return an error if a user tries calling a `persisted operation` with an unknown sha. After the query is planned, the router will cache the normalized query in the local persisted operation cache. -1. **APQ, No Persisted Operations** → In this scenario, if a `persisted_operation` request is sent, the router will first check if there is an APQ cached that matches. If a query is found, the router will execute the query. If a query is not found, the router will look if a query was submitted together with the persisted operation hash. If so, it will execute that query and save it in the cache for the future, and if not, the router will return an error. -1. **No APQ, No Persisted Operations** → If a persisted operation is sent, the router will return an error, as there are no persisted operations stored. Even if a query is sent, the router will still error because APQ isn't enabled. -1. **APQ and Persisted Operations** → In this scenario, the router will first check if the query was stored as an APQ. If it is, the router will execute the query. If it is not, the router will check the persistent query files. If the query is found, the router will execute the query. If the query is not found, the router will check if the query was sent together with the persisted operation hash. If it was, the router will execute the query and save it in the APQ cache for the future. If it was not, the router will return an error. + +> **Hash validation prerequisite:** When a request includes both a query body and `extensions.persistedQuery.sha256Hash`, the router validates the body against the hash and rejects the request if they do not match — _before_ any APQ or persisted-operation lookup occurs. See `router/core/graphql_prehandler.go` (`handleOperation`). + +1. **Persisted Operations (CDN), no APQ** → The router fetches individual operations from CDN/S3 on demand. If a query is not found, the router returns an error. After the query is planned, the router caches the normalized query in the local persisted operation cache. +1. **PQL Manifest, no APQ** → The router loads the manifest (`manifest.json`) at startup from the configured storage provider (S3, CDN, or filesystem). When no storage provider is configured, the router fetches from the Cosmo CDN and polls for updates. When a storage provider is configured, the manifest is loaded once at startup. All lookups are in-memory. Unknown hashes are rejected immediately without any network call. +1. **APQ, No Persisted Operations** → If a `persisted_operation` request is sent, the router checks the APQ cache first. If not found, it checks if a query body was sent with the request. If so, it validates the hash against the body, then executes and caches it. Otherwise, the router returns an error. +1. **No APQ, No Persisted Operations** → If a persisted operation is sent, the router returns an error, as there are no persisted operations stored. Even if a query is sent, the router will still error because APQ isn't enabled. +1. **APQ and Persisted Operations** → The router validates any included query body against the hash, then checks APQ first, then the PQL manifest or CDN (depending on config), then checks if a query body was attached. First match wins. + +## Enforcement Modes + +- **safelist** — when enabled, only operations found in persisted storage (manifest or CDN) are allowed. Ad-hoc queries are rejected with `PersistedQueryNotFound`. +- **log_unknown** — when enabled, ad-hoc queries that are not in persisted storage are logged but still allowed. Combined with safelist, unknown queries are both logged and rejected. diff --git a/router/internal/persistedoperation/client.go b/router/internal/persistedoperation/client.go index 2028f67941..d204161d5e 100644 --- a/router/internal/persistedoperation/client.go +++ b/router/internal/persistedoperation/client.go @@ -7,6 +7,7 @@ import ( "github.com/wundergraph/cosmo/router/internal/persistedoperation/apq" "github.com/wundergraph/cosmo/router/internal/persistedoperation/operationstorage" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" "go.uber.org/zap" ) @@ -26,6 +27,7 @@ func (e PersistentOperationNotFoundError) Error() string { type StorageClient interface { PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) + ReadManifest(ctx context.Context, objectPath string) (*pqlmanifest.Manifest, error) Close() } @@ -37,12 +39,14 @@ type Options struct { ProviderClient StorageClient ApqClient apq.Client + PQLStore *pqlmanifest.Store } type Client struct { cache *operationstorage.OperationsCache providerClient StorageClient apqClient apq.Client + pqlStore *pqlmanifest.Store } func NewClient(opts *Options) (*Client, error) { @@ -57,11 +61,12 @@ func NewClient(opts *Options) (*Client, error) { providerClient: opts.ProviderClient, cache: cache, apqClient: opts.ApqClient, + pqlStore: opts.PQLStore, }, nil } func (c *Client) PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, bool, error) { - if c.apqClient != nil && c.apqClient.Enabled() { + if c.APQEnabled() { resp, apqErr := c.apqClient.PersistedOperation(ctx, clientName, sha256Hash) if len(resp) > 0 || apqErr != nil { return resp, true, apqErr @@ -72,9 +77,24 @@ func (c *Client) PersistedOperation(ctx context.Context, clientName string, sha2 return data, false, nil } + // PQL manifest check (local, no network) + if c.pqlStore != nil && c.pqlStore.IsLoaded() { + if body, found := c.pqlStore.LookupByHash(sha256Hash); found { + return body, false, nil + } + // Manifest is authoritative — operation not found + if c.APQEnabled() { + return nil, true, nil + } + return nil, false, &PersistentOperationNotFoundError{ + ClientName: clientName, Sha256Hash: sha256Hash, + } + } + if c.providerClient == nil { - // This can happen if we are using APQ client, without any persisted operation client. Otherwise, we should have a provider client and shouldn't reach here. - return nil, c.apqClient != nil, nil + // This can happen if we are using APQ client without any persisted operation client, + // or if the PQL manifest is enabled but hasn't loaded yet (e.g. initial fetch failed). + return nil, c.APQEnabled(), nil } var ( @@ -107,6 +127,16 @@ func (c *Client) APQEnabled() bool { return c.apqClient != nil && c.apqClient.Enabled() } +// ManifestEnabled returns whether a PQL manifest is configured and loaded. +func (c *Client) ManifestEnabled() bool { + return c.pqlStore != nil && c.pqlStore.IsLoaded() +} + +// PQLStore returns the PQL manifest store, or nil if no manifest is configured. +func (c *Client) PQLStore() *pqlmanifest.Store { + return c.pqlStore +} + func (c *Client) Close() { if c.providerClient != nil { c.providerClient.Close() diff --git a/router/internal/persistedoperation/operationstorage/cdn/client.go b/router/internal/persistedoperation/operationstorage/cdn/client.go index d16f7e6bf6..02a7d6906d 100644 --- a/router/internal/persistedoperation/operationstorage/cdn/client.go +++ b/router/internal/persistedoperation/operationstorage/cdn/client.go @@ -13,6 +13,7 @@ import ( "github.com/wundergraph/cosmo/router/internal/httpclient" "github.com/wundergraph/cosmo/router/internal/jwt" "github.com/wundergraph/cosmo/router/internal/persistedoperation" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" "go.opentelemetry.io/otel/codes" semconv12 "go.opentelemetry.io/otel/semconv/v1.12.0" semconv "go.opentelemetry.io/otel/semconv/v1.17.0" @@ -24,9 +25,12 @@ type Options struct { Logger *zap.Logger } -var _ persistedoperation.StorageClient = (*client)(nil) +// Deprecated: The CDN-based persisted operation Client is deprecated. +// The router now downloads all operations at once via the PQL manifest, avoiding +// per-request CDN latency. This Client is kept for backward compatibility. +var _ persistedoperation.StorageClient = (*Client)(nil) -type client struct { +type Client struct { cdnURL *url.URL authenticationToken string // federatedGraphID is the ID of the federated graph that was obtained @@ -37,9 +41,48 @@ type client struct { organizationID string httpClient *http.Client logger *zap.Logger + fetcher *pqlmanifest.Fetcher } -func (cdn *client) PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) { +// NewClient creates a new CDN Client. URL is the URL of the CDN. +// Token is the token used to authenticate with the CDN, the same as the GRAPH_API_TOKEN +func NewClient(endpoint string, token string, opts Options) (*Client, error) { + u, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("invalid CDN URL %q: %w", endpoint, err) + } + + if opts.Logger == nil { + opts.Logger = zap.NewNop() + } + + claims, err := jwt.ExtractFederatedGraphTokenClaims(token) + if err != nil { + return nil, err + } + + logger := opts.Logger.With( + zap.String("component", "persisted_operations_client"), + zap.String("url", endpoint), + ) + + fetcher, err := pqlmanifest.NewFetcher(endpoint, token, logger) + if err != nil { + return nil, fmt.Errorf("failed to create manifest fetcher: %w", err) + } + + return &Client{ + cdnURL: u, + authenticationToken: token, + federatedGraphID: url.PathEscape(claims.FederatedGraphID), + organizationID: url.PathEscape(claims.OrganizationID), + httpClient: httpclient.NewRetryableHTTPClient(logger), + logger: logger, + fetcher: fetcher, + }, nil +} + +func (cdn *Client) PersistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) { content, err := cdn.persistedOperation(ctx, clientName, sha256Hash) if err != nil { return nil, err @@ -48,7 +91,7 @@ func (cdn *client) PersistedOperation(ctx context.Context, clientName string, sh return content, nil } -func (cdn *client) persistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) { +func (cdn *Client) persistedOperation(ctx context.Context, clientName string, sha256Hash string) ([]byte, error) { span := trace.SpanFromContext(ctx) @@ -70,9 +113,7 @@ func (cdn *client) persistedOperation(ctx context.Context, clientName string, sh semconv12.HTTPHostKey.String(req.Host), ) - req.Header.Set("Content-Type", "application/json; charset=UTF-8") - req.Header.Add("Authorization", "Bearer "+cdn.authenticationToken) - req.Header.Set("Accept-Encoding", "gzip") + cdn.setCDNHeaders(req) resp, err := cdn.httpClient.Do(req) if err != nil { @@ -102,18 +143,11 @@ func (cdn *client) persistedOperation(ctx context.Context, clientName string, sh return nil, fmt.Errorf("unexpected status code when loading persisted operation, statusCode: %d", resp.StatusCode) } - var reader io.Reader = resp.Body - - if resp.Header.Get("Content-Encoding") == "gzip" { - r, err := gzip.NewReader(resp.Body) - if err != nil { - return nil, errors.New("could not create gzip reader. " + err.Error()) - } - defer func() { - _ = r.Close() - }() - reader = r + reader, cleanup, err := gzipAwareReader(resp) + if err != nil { + return nil, err } + defer cleanup() body, err := io.ReadAll(reader) if err != nil { @@ -129,36 +163,42 @@ func (cdn *client) persistedOperation(ctx context.Context, clientName string, sh return []byte(po.Body), nil } -// NewClient creates a new CDN client. URL is the URL of the CDN. -// Token is the token used to authenticate with the CDN, the same as the GRAPH_API_TOKEN -func NewClient(endpoint string, token string, opts Options) (*client, error) { - u, err := url.Parse(endpoint) - if err != nil { - return nil, fmt.Errorf("invalid CDN URL %q: %w", endpoint, err) - } +// setCDNHeaders sets the common headers for CDN requests. +func (cdn *Client) setCDNHeaders(req *http.Request) { + req.Header.Set("Content-Type", "application/json; charset=UTF-8") + req.Header.Add("Authorization", "Bearer "+cdn.authenticationToken) + req.Header.Set("Accept-Encoding", "gzip") +} - if opts.Logger == nil { - opts.Logger = zap.NewNop() +// gzipAwareReader returns a reader that transparently decompresses the response body +// if the response is gzip-encoded, along with a cleanup function that must be deferred. +func gzipAwareReader(resp *http.Response) (io.Reader, func(), error) { + if resp.Header.Get("Content-Encoding") == "gzip" { + r, err := gzip.NewReader(resp.Body) + if err != nil { + return nil, nil, fmt.Errorf("could not create gzip reader: %w", err) + } + return r, func() { _ = r.Close() }, nil } + return resp.Body, func() {}, nil +} - claims, err := jwt.ExtractFederatedGraphTokenClaims(token) +// ReadManifest fetches the PQL manifest from the CDN, delegating to the manifest Fetcher. +// The objectPath parameter is unused — the Fetcher constructs the path from JWT claims. +func (cdn *Client) ReadManifest(ctx context.Context, _ string) (*pqlmanifest.Manifest, error) { + manifest, _, err := cdn.fetcher.Fetch(ctx, "") if err != nil { return nil, err } + if manifest == nil { + return nil, fmt.Errorf("no manifest returned from CDN") + } + return manifest, nil +} - logger := opts.Logger.With( - zap.String("component", "persisted_operations_client"), - zap.String("url", endpoint), - ) - - return &client{ - cdnURL: u, - authenticationToken: token, - federatedGraphID: url.PathEscape(claims.FederatedGraphID), - organizationID: url.PathEscape(claims.OrganizationID), - httpClient: httpclient.NewRetryableHTTPClient(logger), - logger: logger, - }, nil +// Fetcher returns the manifest fetcher for use with polling. +func (cdn *Client) Fetcher() *pqlmanifest.Fetcher { + return cdn.fetcher } -func (cdn *client) Close() {} +func (cdn *Client) Close() {} diff --git a/router/internal/persistedoperation/operationstorage/fs/client.go b/router/internal/persistedoperation/operationstorage/fs/client.go index 6e7181d721..d808c11c33 100644 --- a/router/internal/persistedoperation/operationstorage/fs/client.go +++ b/router/internal/persistedoperation/operationstorage/fs/client.go @@ -8,6 +8,7 @@ import ( "path/filepath" "github.com/wundergraph/cosmo/router/internal/persistedoperation" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" ) type client struct { @@ -69,4 +70,8 @@ func (c client) persistedOperation(clientName string, sha256Hash string) ([]byte return []byte(po.Body), nil } +func (c client) ReadManifest(_ context.Context, _ string) (*pqlmanifest.Manifest, error) { + return nil, fmt.Errorf("filesystem storage provider does not support reading manifests; use S3 or CDN instead") +} + func (c client) Close() {} diff --git a/router/internal/persistedoperation/operationstorage/s3/client.go b/router/internal/persistedoperation/operationstorage/s3/client.go index 55e8bbfa6b..ebdbccc6ee 100644 --- a/router/internal/persistedoperation/operationstorage/s3/client.go +++ b/router/internal/persistedoperation/operationstorage/s3/client.go @@ -10,6 +10,7 @@ import ( "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/credentials" "github.com/wundergraph/cosmo/router/internal/persistedoperation" + "github.com/wundergraph/cosmo/router/internal/persistedoperation/pqlmanifest" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace" ) @@ -108,4 +109,22 @@ func (c Client) persistedOperation(ctx context.Context, clientName, sha256Hash s return []byte(po.Body), nil } +// ReadManifest fetches and parses a PQL manifest from S3 at the given object path. +func (c Client) ReadManifest(ctx context.Context, objectPath string) (*pqlmanifest.Manifest, error) { + reader, err := c.client.GetObject(ctx, c.options.BucketName, objectPath, minio.GetObjectOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get manifest from S3: %w", err) + } + defer func() { + _ = reader.Close() + }() + + data, err := io.ReadAll(reader) + if err != nil { + return nil, fmt.Errorf("failed to read manifest from S3: %w", err) + } + + return pqlmanifest.ParseManifest(data) +} + func (c Client) Close() {} diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher.go b/router/internal/persistedoperation/pqlmanifest/fetcher.go new file mode 100644 index 0000000000..23937301fe --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/fetcher.go @@ -0,0 +1,145 @@ +package pqlmanifest + +import ( + "compress/gzip" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + + "github.com/wundergraph/cosmo/router/internal/httpclient" + "github.com/wundergraph/cosmo/router/internal/jwt" + "go.uber.org/zap" +) + +type Fetcher struct { + cdnURL *url.URL + authenticationToken string + // federatedGraphID is the ID of the federated graph that was obtained + // from the token, already url-escaped + federatedGraphID string + // organizationID is the ID of the organization for this graph that was obtained + // from the token, already url-escaped + organizationID string + httpClient *http.Client + logger *zap.Logger + store *Store +} + +// NewFetcher creates a new manifest fetcher. It reuses JWT extraction and HTTP client +// setup patterns from the CDN persisted operations client. +func NewFetcher(endpoint, token string, logger *zap.Logger) (*Fetcher, error) { + u, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("invalid CDN URL %q: %w", endpoint, err) + } + + claims, err := jwt.ExtractFederatedGraphTokenClaims(token) + if err != nil { + return nil, err + } + + if logger == nil { + logger = zap.NewNop() + } + + logger = logger.With( + zap.String("component", "pql_manifest_fetcher"), + zap.String("url", endpoint), + ) + + return &Fetcher{ + cdnURL: u, + authenticationToken: token, + federatedGraphID: url.PathEscape(claims.FederatedGraphID), + organizationID: url.PathEscape(claims.OrganizationID), + httpClient: httpclient.NewRetryableHTTPClient(logger), + logger: logger, + store: NewStore(logger), + }, nil +} + +// Store returns the underlying manifest store for read access (lookups, revision). +func (f *Fetcher) Store() *Store { + return f.store +} + +// Fetch downloads the manifest from the CDN. It GETs /{orgId}/{fedGraphId}/operations/manifest.json +// with Bearer auth, using If-None-Match for conditional requests. The CDN returns 304 Not Modified +// when the ETag matches, avoiding a full download. Returns (manifest, changed, err). +func (f *Fetcher) Fetch(ctx context.Context, currentRevision string) (*Manifest, bool, error) { + manifestPath := fmt.Sprintf("/%s/%s/operations/manifest.json", f.organizationID, f.federatedGraphID) + manifestURL := f.cdnURL.ResolveReference(&url.URL{Path: manifestPath}) + + req, err := http.NewRequestWithContext(ctx, "GET", manifestURL.String(), nil) + if err != nil { + return nil, false, err + } + + req.Header.Set("Authorization", "Bearer "+f.authenticationToken) + req.Header.Set("Accept-Encoding", "gzip") + if currentRevision != "" { + req.Header.Set("If-None-Match", `"`+currentRevision+`"`) + } + + resp, err := f.httpClient.Do(req) + if err != nil { + return nil, false, err + } + defer func() { + _ = resp.Body.Close() + }() + + if resp.StatusCode == http.StatusNotModified { + return nil, false, nil + } + + if resp.StatusCode != http.StatusOK { + if resp.StatusCode == http.StatusNotFound { + return nil, false, errors.New("PQL manifest not found on CDN") + } + if resp.StatusCode == http.StatusUnauthorized { + return nil, false, errors.New("could not authenticate against CDN") + } + if resp.StatusCode == http.StatusBadRequest { + return nil, false, errors.New("bad request") + } + return nil, false, fmt.Errorf("unexpected status code when loading PQL manifest, statusCode: %d", resp.StatusCode) + } + + var reader io.Reader = resp.Body + + if resp.Header.Get("Content-Encoding") == "gzip" { + r, err := gzip.NewReader(resp.Body) + if err != nil { + return nil, false, fmt.Errorf("could not create gzip reader: %w", err) + } + defer func() { + _ = r.Close() + }() + reader = r + } + + body, err := io.ReadAll(reader) + if err != nil { + return nil, false, fmt.Errorf("could not read response body: %w", err) + } + + if len(body) == 0 { + return nil, false, errors.New("empty response body") + } + + var manifest Manifest + if err := json.Unmarshal(body, &manifest); err != nil { + return nil, false, fmt.Errorf("could not unmarshal PQL manifest: %w", err) + } + + if err := validateManifest(&manifest); err != nil { + return nil, false, fmt.Errorf("invalid PQL manifest: %w", err) + } + + return &manifest, true, nil +} diff --git a/router/internal/persistedoperation/pqlmanifest/fetcher_test.go b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go new file mode 100644 index 0000000000..e46631e68b --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/fetcher_test.go @@ -0,0 +1,210 @@ +package pqlmanifest + +import ( + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func newTestFetcher(serverURL string) *Fetcher { + u, _ := url.Parse(serverURL) + return &Fetcher{ + cdnURL: u, + authenticationToken: "test-token", + federatedGraphID: "graph-id", + organizationID: "org-id", + httpClient: &http.Client{}, + logger: zap.NewNop(), + store: NewStore(zap.NewNop()), + } +} + +// mustMarshalManifest marshals a Manifest to JSON, panicking on error. +func mustMarshalManifest(m *Manifest) []byte { + data, err := json.Marshal(m) + if err != nil { + panic(err) + } + return data +} + +// newETagCDNHandler returns an http.Handler that serves a manifest with ETag support. +// It returns 304 when If-None-Match matches the manifest's revision. +func newETagCDNHandler(m *Manifest) http.Handler { + data := mustMarshalManifest(m) + etag := `"` + m.Revision + `"` + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("If-None-Match") == etag { + w.Header().Set("ETag", etag) + w.WriteHeader(http.StatusNotModified) + return + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", etag) + w.Write(data) + }) +} + +func TestFetch_SendsIfNoneMatchHeader(t *testing.T) { + t.Parallel() + var receivedHeaders http.Header + var receivedMethod string + var receivedBody []byte + + m := &Manifest{ + Version: 1, + Revision: "rev-123", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"hash1": "query { a }"}, + } + data := mustMarshalManifest(m) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedHeaders = r.Header + receivedMethod = r.Method + receivedBody, _ = io.ReadAll(r.Body) + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", `"rev-123"`) + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + result, changed, err := f.Fetch(context.Background(), "rev-123") + + require.NoError(t, err) + require.True(t, changed) + require.NotNil(t, result) + require.Equal(t, m.Revision, result.Revision) + require.Equal(t, `"rev-123"`, receivedHeaders.Get("If-None-Match")) + require.Equal(t, "GET", receivedMethod) + require.Empty(t, receivedBody, "GET request should have no body") +} + +func TestFetch_NoIfNoneMatchOnFirstRequest(t *testing.T) { + t.Parallel() + var receivedHeaders http.Header + + m := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"hash1": "query { a }"}, + } + + server := httptest.NewServer(newETagCDNHandler(m)) + defer server.Close() + + f := newTestFetcher(server.URL) + + // Wrap to capture headers + var origHandler http.Handler = server.Config.Handler + server.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedHeaders = r.Header + origHandler.ServeHTTP(w, r) + }) + + result, changed, err := f.Fetch(context.Background(), "") + + require.NoError(t, err) + require.True(t, changed) + require.NotNil(t, result) + require.Equal(t, "", receivedHeaders.Get("If-None-Match")) +} + +func TestFetch_Handles304Response(t *testing.T) { + t.Parallel() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotModified) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + result, changed, err := f.Fetch(context.Background(), "rev-123") + + require.NoError(t, err) + require.False(t, changed) + require.Nil(t, result) +} + +func TestFetch_Handles200WithManifest(t *testing.T) { + t.Parallel() + m := &Manifest{ + Version: 1, + Revision: "rev-456", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"hash1": "query { hello }"}, + } + + server := httptest.NewServer(newETagCDNHandler(m)) + defer server.Close() + + f := newTestFetcher(server.URL) + result, changed, err := f.Fetch(context.Background(), "rev-123") + + require.NoError(t, err) + require.True(t, changed) + require.NotNil(t, result) + require.Equal(t, m.Revision, result.Revision) + require.Equal(t, m.Operations["hash1"], result.Operations["hash1"]) +} + +func TestFetch_ETagRoundTrip(t *testing.T) { + t.Parallel() + m := &Manifest{ + Version: 1, + Revision: "rev-rt", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"h1": "query { a }"}, + } + + server := httptest.NewServer(newETagCDNHandler(m)) + defer server.Close() + + f := newTestFetcher(server.URL) + + // First fetch: no revision, should get full manifest + result, changed, err := f.Fetch(context.Background(), "") + require.NoError(t, err) + require.True(t, changed) + require.NotNil(t, result) + require.Equal(t, m.Revision, result.Revision) + + // Second fetch: send revision back, should get 304 + result2, changed2, err2 := f.Fetch(context.Background(), result.Revision) + require.NoError(t, err2) + require.False(t, changed2) + require.Nil(t, result2) +} + +func TestFetch_UsesGETMethod(t *testing.T) { + t.Parallel() + var receivedMethod string + + m := &Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{}, + } + data := mustMarshalManifest(m) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedMethod = r.Method + w.Header().Set("Content-Type", "application/json") + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + _, _, err := f.Fetch(context.Background(), "") + + require.NoError(t, err) + require.Equal(t, "GET", receivedMethod) +} diff --git a/router/internal/persistedoperation/pqlmanifest/poller.go b/router/internal/persistedoperation/pqlmanifest/poller.go new file mode 100644 index 0000000000..b85be2ae6f --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/poller.go @@ -0,0 +1,85 @@ +package pqlmanifest + +import ( + "context" + "math/rand" + "time" + + "go.uber.org/zap" +) + +type Poller struct { + fetcher *Fetcher + pollInterval time.Duration + pollJitter time.Duration + logger *zap.Logger +} + +func NewPoller(fetcher *Fetcher, pollInterval, pollJitter time.Duration, logger *zap.Logger) *Poller { + if pollJitter <= 0 { + pollJitter = 5 * time.Second + } + if pollInterval <= 0 { + pollInterval = 10 * time.Second + } + if logger == nil { + logger = zap.NewNop() + } + return &Poller{ + fetcher: fetcher, + pollInterval: pollInterval, + pollJitter: pollJitter, + logger: logger, + } +} + +// FetchInitial performs a blocking initial fetch, called at startup. +func (p *Poller) FetchInitial(ctx context.Context) error { + manifest, changed, err := p.fetcher.Fetch(ctx, "") + if err != nil { + return err + } + + if changed && manifest != nil { + p.fetcher.Store().Load(manifest) + p.logger.Info("Loaded initial PQL manifest", + zap.String("revision", manifest.Revision), + zap.Int("operation_count", len(manifest.Operations)), + ) + } + + return nil +} + +// Poll runs a background goroutine loop that periodically fetches the manifest. +// It sleeps for pollInterval + random jitter, fetches, and if changed updates the store. +// It exits when ctx is cancelled. +func (p *Poller) Poll(ctx context.Context) { + store := p.fetcher.Store() + for { + jitter := time.Duration(rand.Int63n(int64(p.pollJitter + 1))) + sleepDuration := p.pollInterval + jitter + + select { + case <-ctx.Done(): + return + case <-time.After(sleepDuration): + } + + currentRevision := store.Revision() + manifest, changed, err := p.fetcher.Fetch(ctx, currentRevision) + if err != nil { + p.logger.Warn("Failed to fetch PQL manifest", zap.Error(err)) + continue + } + + if changed && manifest != nil { + store.Load(manifest) + p.logger.Debug("Updated PQL manifest", + zap.String("revision", manifest.Revision), + zap.String("previous_revision", currentRevision), + zap.Int("operation_count", len(manifest.Operations)), + ) + } + } +} diff --git a/router/internal/persistedoperation/pqlmanifest/poller_test.go b/router/internal/persistedoperation/pqlmanifest/poller_test.go new file mode 100644 index 0000000000..8c44268c75 --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/poller_test.go @@ -0,0 +1,188 @@ +package pqlmanifest + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func TestPoller_FetchInitial(t *testing.T) { + t.Parallel() + m := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"h1": "query { a }"}, + } + + server := httptest.NewServer(newETagCDNHandler(m)) + defer server.Close() + + f := newTestFetcher(server.URL) + poller := NewPoller(f, 10*time.Second, 1*time.Second, zap.NewNop()) + + err := poller.FetchInitial(context.Background()) + require.NoError(t, err) + + store := f.Store() + require.True(t, store.IsLoaded()) + require.Equal(t, m.Revision, store.Revision()) + require.Equal(t, len(m.Operations), store.OperationCount()) +} + +func TestPoller_FetchInitialError(t *testing.T) { + t.Parallel() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + poller := NewPoller(f, 10*time.Second, 1*time.Second, zap.NewNop()) + + err := poller.FetchInitial(context.Background()) + require.Error(t, err) + require.False(t, f.Store().IsLoaded()) +} + +func TestPoller_PollUpdatesManifest(t *testing.T) { + t.Parallel() + manifestV1 := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"h1": "query { a }"}, + } + manifestV2 := &Manifest{ + Version: 1, + Revision: "rev-2", + GeneratedAt: "2025-01-02T00:00:00Z", + Operations: map[string]string{"h1": "query { a }", "h2": "query { b }"}, + } + + var currentManifest atomic.Value + currentManifest.Store(manifestV1) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + m := currentManifest.Load().(*Manifest) + etag := `"` + m.Revision + `"` + if r.Header.Get("If-None-Match") == etag { + w.Header().Set("ETag", etag) + w.WriteHeader(http.StatusNotModified) + return + } + w.Header().Set("Content-Type", "application/json") + w.Header().Set("ETag", etag) + data, _ := json.Marshal(m) + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + poller := NewPoller(f, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + + // Initial fetch + err := poller.FetchInitial(context.Background()) + require.NoError(t, err) + + store := f.Store() + require.Equal(t, manifestV1.Revision, store.Revision()) + require.Equal(t, len(manifestV1.Operations), store.OperationCount()) + + // Start polling + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go poller.Poll(ctx) + + // Wait a few poll cycles — manifest should stay at rev-1 (304s) + time.Sleep(150 * time.Millisecond) + require.Equal(t, manifestV1.Revision, store.Revision()) + + // Update server to serve rev-2 + currentManifest.Store(manifestV2) + + // Wait for poller to pick up the change + require.Eventually(t, func() bool { + return store.Revision() == manifestV2.Revision + }, 2*time.Second, 10*time.Millisecond) + + require.Equal(t, len(manifestV2.Operations), store.OperationCount()) +} + +func TestPoller_PollStopsOnContextCancel(t *testing.T) { + t.Parallel() + var fetchCount atomic.Int32 + + m := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{}, + } + data := mustMarshalManifest(m) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fetchCount.Add(1) + w.Header().Set("Content-Type", "application/json") + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + poller := NewPoller(f, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + + ctx, cancel := context.WithCancel(context.Background()) + go poller.Poll(ctx) + + // Let it poll a few times + time.Sleep(200 * time.Millisecond) + cancel() + + countAtCancel := fetchCount.Load() + // Wait and verify no more fetches happen + time.Sleep(200 * time.Millisecond) + require.Equal(t, countAtCancel, fetchCount.Load(), "poller should stop fetching after context cancel") +} + +func TestPoller_PollContinuesOnFetchError(t *testing.T) { + t.Parallel() + var requestCount atomic.Int32 + + m := &Manifest{ + Version: 1, + Revision: "rev-1", + GeneratedAt: "2025-01-01T00:00:00Z", + Operations: map[string]string{"h1": "query { a }"}, + } + data := mustMarshalManifest(m) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + count := requestCount.Add(1) + if count <= 2 { + w.WriteHeader(http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.Write(data) + })) + defer server.Close() + + f := newTestFetcher(server.URL) + poller := NewPoller(f, 50*time.Millisecond, 1*time.Millisecond, zap.NewNop()) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go poller.Poll(ctx) + + store := f.Store() + require.Eventually(t, func() bool { + return store.IsLoaded() && store.Revision() == m.Revision + }, 5*time.Second, 10*time.Millisecond) +} diff --git a/router/internal/persistedoperation/pqlmanifest/store.go b/router/internal/persistedoperation/pqlmanifest/store.go new file mode 100644 index 0000000000..21f9dcc1c3 --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/store.go @@ -0,0 +1,137 @@ +package pqlmanifest + +import ( + "encoding/json" + "fmt" + "os" + "sync/atomic" + + "go.uber.org/zap" +) + +type Manifest struct { + Version int `json:"version"` + Revision string `json:"revision"` + GeneratedAt string `json:"generatedAt"` + Operations map[string]string `json:"operations"` // sha256 hash -> operation body +} + +type Store struct { + manifest atomic.Pointer[Manifest] + onUpdate atomic.Value // stores func() + logger *zap.Logger +} + +func NewStore(logger *zap.Logger) *Store { + return &Store{ + logger: logger, + } +} + +// SetOnUpdate registers a callback that is invoked after the manifest is updated via Load. +// The callback is called asynchronously in a new goroutine to avoid blocking the poller. +func (s *Store) SetOnUpdate(fn func()) { + s.onUpdate.Store(fn) +} + +// Load swaps the manifest atomically and invokes the onUpdate callback if set. +func (s *Store) Load(manifest *Manifest) { + s.manifest.Store(manifest) + + if fn, ok := s.onUpdate.Load().(func()); ok && fn != nil { + go fn() + } +} + +// LookupByHash performs an O(1) map lookup by sha256 hash. +func (s *Store) LookupByHash(sha256Hash string) (body []byte, found bool) { + m := s.manifest.Load() + if m == nil { + return nil, false + } + + op, ok := m.Operations[sha256Hash] + if !ok { + return nil, false + } + + return []byte(op), true +} + +// LoadFromFile reads a manifest JSON file from disk and loads it into the store. +func (s *Store) LoadFromFile(path string) error { + data, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("failed to read manifest file: %w", err) + } + + return s.LoadFromData(data) +} + +// ParseManifest parses and validates manifest JSON data. +func ParseManifest(data []byte) (*Manifest, error) { + var manifest Manifest + if err := json.Unmarshal(data, &manifest); err != nil { + return nil, fmt.Errorf("failed to parse manifest: %w", err) + } + if err := validateManifest(&manifest); err != nil { + return nil, fmt.Errorf("invalid manifest: %w", err) + } + return &manifest, nil +} + +// LoadFromData parses and validates manifest JSON data and loads it into the store. +func (s *Store) LoadFromData(data []byte) error { + manifest, err := ParseManifest(data) + if err != nil { + return err + } + s.Load(manifest) + return nil +} + +func validateManifest(m *Manifest) error { + if m.Version != 1 { + return fmt.Errorf("unsupported manifest version %d, expected 1", m.Version) + } + if m.Revision == "" { + return fmt.Errorf("manifest revision is required") + } + if m.Operations == nil { + return fmt.Errorf("manifest operations field is required") + } + return nil +} + +// IsLoaded returns whether a manifest has been loaded. +func (s *Store) IsLoaded() bool { + return s.manifest.Load() != nil +} + +// Revision returns the current manifest revision for polling. +func (s *Store) Revision() string { + m := s.manifest.Load() + if m == nil { + return "" + } + return m.Revision +} + +// OperationCount returns the number of operations in the manifest. +func (s *Store) OperationCount() int { + m := s.manifest.Load() + if m == nil { + return 0 + } + return len(m.Operations) +} + +// AllOperations returns all operations from the manifest for iteration (e.g., warmup). +// Returns nil if no manifest is loaded. +func (s *Store) AllOperations() map[string]string { + m := s.manifest.Load() + if m == nil { + return nil + } + return m.Operations +} diff --git a/router/internal/persistedoperation/pqlmanifest/store_test.go b/router/internal/persistedoperation/pqlmanifest/store_test.go new file mode 100644 index 0000000000..2e1c20c727 --- /dev/null +++ b/router/internal/persistedoperation/pqlmanifest/store_test.go @@ -0,0 +1,84 @@ +package pqlmanifest + +import ( + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +func TestStore(t *testing.T) { + t.Run("Load and LookupByHash", func(t *testing.T) { + store := NewStore(zap.NewNop()) + + store.Load(&Manifest{ + Version: 1, + Revision: "rev-1", + Operations: map[string]string{"abc": "query { a }"}, + }) + + body, found := store.LookupByHash("abc") + require.True(t, found) + require.Equal(t, "query { a }", string(body)) + require.Equal(t, "rev-1", store.Revision()) + }) + + t.Run("Revision changes on Load", func(t *testing.T) { + store := NewStore(zap.NewNop()) + + store.Load(&Manifest{Version: 1, Revision: "rev-1", Operations: map[string]string{"abc": "query { a }"}}) + require.Equal(t, "rev-1", store.Revision()) + + store.Load(&Manifest{Version: 1, Revision: "rev-2", Operations: map[string]string{"def": "query { b }"}}) + require.Equal(t, "rev-2", store.Revision()) + + // Old operation gone, new one present + _, found := store.LookupByHash("abc") + require.False(t, found) + body, found := store.LookupByHash("def") + require.True(t, found) + require.Equal(t, "query { b }", string(body)) + }) + + t.Run("AllOperations returns nil when not loaded", func(t *testing.T) { + store := NewStore(zap.NewNop()) + require.Nil(t, store.AllOperations()) + }) + + t.Run("AllOperations returns all operations", func(t *testing.T) { + store := NewStore(zap.NewNop()) + ops := map[string]string{ + "hash1": "query { a }", + "hash2": "query { b }", + "hash3": "mutation { c }", + } + store.Load(&Manifest{Version: 1, Revision: "rev-1", Operations: ops}) + + result := store.AllOperations() + require.Equal(t, ops, result) + }) + + t.Run("SetOnUpdate callback is invoked on Load", func(t *testing.T) { + store := NewStore(zap.NewNop()) + + var called atomic.Bool + store.SetOnUpdate(func() { + called.Store(true) + }) + + store.Load(&Manifest{Version: 1, Revision: "rev-1", Operations: map[string]string{"a": "query { a }"}}) + + // Callback runs in a goroutine, wait briefly + require.Eventually(t, func() bool { + return called.Load() + }, time.Second, 10*time.Millisecond) + }) + + t.Run("SetOnUpdate not called when no callback set", func(t *testing.T) { + store := NewStore(zap.NewNop()) + // Should not panic + store.Load(&Manifest{Version: 1, Revision: "rev-1", Operations: map[string]string{"a": "query { a }"}}) + }) +} diff --git a/router/pkg/authentication/oidc_discovery_client.go b/router/pkg/authentication/oidc_discovery_client.go index 75e403d4bf..9bbfc924ad 100644 --- a/router/pkg/authentication/oidc_discovery_client.go +++ b/router/pkg/authentication/oidc_discovery_client.go @@ -2,6 +2,7 @@ package authentication import ( "encoding/json" + "io" "net/http" "strings" ) @@ -45,6 +46,7 @@ func (c *oidcDiscoveryClient) RoundTrip(req *http.Request) (*http.Response, erro } defer func() { + _, _ = io.Copy(io.Discard, resp.Body) _ = resp.Body.Close() }() diff --git a/router/pkg/config/config.go b/router/pkg/config/config.go index f9715bfe97..3b684b4523 100644 --- a/router/pkg/config/config.go +++ b/router/pkg/config/config.go @@ -972,12 +972,27 @@ type AutomaticPersistedQueriesCacheConfig struct { TTL int `yaml:"ttl" env:"APQ_CACHE_TTL" envDefault:"-1"` } +type PQLManifestWarmupConfig struct { + Enabled bool `yaml:"enabled" envDefault:"true" env:"ENABLED"` + Workers int `yaml:"workers" envDefault:"4" env:"WORKERS"` + ItemsPerSecond int `yaml:"items_per_second" envDefault:"50" env:"ITEMS_PER_SECOND"` + Timeout time.Duration `yaml:"timeout" envDefault:"30s" env:"TIMEOUT"` +} + +type PQLManifestConfig struct { + Enabled bool `yaml:"enabled" envDefault:"false" env:"ENABLED"` + PollInterval time.Duration `yaml:"poll_interval" envDefault:"10s" env:"POLL_INTERVAL"` + PollJitter time.Duration `yaml:"poll_jitter" envDefault:"5s" env:"POLL_JITTER"` + Warmup PQLManifestWarmupConfig `yaml:"warmup" envPrefix:"WARMUP_"` +} + type PersistedOperationsConfig struct { Disabled bool `yaml:"disabled" env:"DISABLED" envDefault:"false"` LogUnknown bool `yaml:"log_unknown" env:"LOG_UNKNOWN" envDefault:"false"` Safelist SafelistConfiguration `yaml:"safelist" envPrefix:"SAFELIST_"` Cache PersistedOperationsCacheConfig `yaml:"cache"` Storage PersistedOperationsStorageConfig `yaml:"storage"` + Manifest PQLManifestConfig `yaml:"manifest" envPrefix:"MANIFEST_"` } type SafelistConfiguration struct { @@ -1165,7 +1180,7 @@ type Config struct { Modules map[string]interface{} `yaml:"modules,omitempty"` Headers HeaderRules `yaml:"headers,omitempty"` - TrafficShaping TrafficShapingRules `yaml:"traffic_shaping,omitempty"` + TrafficShaping TrafficShapingRules `yaml:"traffic_shaping,omitempty" envPrefix:"TRAFFIC_SHAPING_"` FileUpload FileUpload `yaml:"file_upload,omitempty"` AccessLogs AccessLogsConfig `yaml:"access_logs,omitempty"` Batching BatchingConfig `yaml:"batching,omitempty"` diff --git a/router/pkg/config/config.schema.json b/router/pkg/config/config.schema.json index c0b6b79350..5d3f13c12f 100644 --- a/router/pkg/config/config.schema.json +++ b/router/pkg/config/config.schema.json @@ -6,9 +6,7 @@ "version": { "type": "string", "description": "The version of the configuration file. This is used to ensure that the configuration file is compatible.", - "enum": [ - "1" - ] + "enum": ["1"] }, "instance_id": { "type": "string", @@ -39,10 +37,7 @@ "type": "array", "items": { "type": "object", - "required": [ - "id", - "url" - ], + "required": ["id", "url"], "additionalProperties": false, "properties": { "id": { @@ -61,10 +56,7 @@ "type": "array", "items": { "type": "object", - "required": [ - "id", - "urls" - ], + "required": ["id", "urls"], "additionalProperties": false, "properties": { "id": { @@ -91,11 +83,7 @@ "description": "The configuration for the S3 storage provider. If no access key and secret key are provided, the provider will attempt to retrieve IAM credentials from the EC2 service.", "items": { "type": "object", - "required": [ - "id", - "bucket", - "endpoint" - ], + "required": ["id", "bucket", "endpoint"], "additionalProperties": false, "properties": { "id": { @@ -134,10 +122,7 @@ "description": "The file system configuration. The file system provider is used to store and retrieve data from the local file system.", "items": { "type": "object", - "required": [ - "id", - "path" - ], + "required": ["id", "path"], "additionalProperties": false, "properties": { "id": { @@ -196,10 +181,7 @@ }, "storage": { "description": "The storage provider for persisted operation. Only one provider can be active. When no provider is specified, the router will fallback to the Cosmo CDN provider to download the persisted operations.", - "required": [ - "provider_id", - "object_prefix" - ], + "required": ["provider_id"], "properties": { "provider_id": { "description": "The ID of the storage provider. The ID must match the ID of the storage provider in the storage_providers section.", @@ -210,6 +192,69 @@ "description": "The prefix of the object in the storage provider location. The prefix is put in front of the operation SHA256 hash. //.json" } } + }, + "manifest": { + "type": "object", + "additionalProperties": false, + "description": "The configuration for the PQL manifest. When enabled, the router downloads the full persisted operations manifest from the CDN and serves operations from memory.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable the PQL manifest feature.", + "default": false + }, + "poll_interval": { + "type": "string", + "format": "go-duration", + "description": "The interval at which the router polls the CDN for manifest updates. The period is specified as a string with a number and a unit, e.g. 10s, 1m, 1h. Minimum is 10s.", + "default": "10s", + "duration": { + "minimum": "10s" + } + }, + "poll_jitter": { + "type": "string", + "format": "go-duration", + "description": "The maximum random jitter added to each poll interval. The period is specified as a string with a number and a unit, e.g. 1s, 5s, 10s. Minimum is 1s.", + "default": "5s", + "duration": { + "minimum": "1s" + } + }, + "warmup": { + "type": "object", + "additionalProperties": false, + "description": "Configuration for cache warmup of PQL manifest operations. When enabled, the router pre-processes all operations from the manifest on startup and after manifest updates.", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable cache warmup for PQL manifest operations.", + "default": true + }, + "workers": { + "type": "integer", + "description": "The number of concurrent workers used to warm up the cache.", + "default": 4, + "minimum": 1 + }, + "items_per_second": { + "type": "integer", + "description": "Rate limit for items processed per second. Set to 0 for unlimited throughput.", + "default": 50, + "minimum": 0 + }, + "timeout": { + "type": "string", + "format": "go-duration", + "description": "The maximum time allowed for the warmup to complete. The period is specified as a string with a number and a unit, e.g. 10s, 1m, 5m.", + "default": "30s", + "duration": { + "minimum": "1s" + } + } + } + } + } } } }, @@ -217,9 +262,7 @@ "type": "object", "additionalProperties": false, "description": "The configuration for the automatic persisted queries (APQ).", - "required": [ - "enabled" - ], + "required": ["enabled"], "properties": { "enabled": { "type": "boolean", @@ -248,10 +291,7 @@ }, "storage": { "description": "The storage provider for automatic persisted operation. Only one provider can be active. When no provider is specified, the router will use a local in-memory cache for retaining APQ queries", - "required": [ - "provider_id", - "object_prefix" - ], + "required": ["provider_id", "object_prefix"], "properties": { "provider_id": { "description": "The ID of the storage provider. The ID must match the ID of the storage provider in the storage_providers section.", @@ -277,9 +317,7 @@ "type": "object", "description": "The configuration for the execution config file. The config file is used to load the execution config from the local file system. The file has precedence over the storage provider.", "additionalProperties": false, - "required": [ - "path" - ], + "required": ["path"], "dependentSchemas": { "watch_interval": { "properties": { @@ -318,10 +356,7 @@ "properties": { "storage": { "description": "The storage provider for the execution config. Only one provider can be active. When no provider is specified, the router will fallback to the Cosmo CDN provider to download the execution config. Updating the execution config is happening in the background without downtime.", - "required": [ - "provider_id", - "object_path" - ], + "required": ["provider_id", "object_path"], "properties": { "provider_id": { "description": "The ID of the storage provider. The ID must match the ID of the storage provider in the storage_providers section.", @@ -341,9 +376,7 @@ "properties": { "fallback_storage": { "description": "The fallback storage provider for the execution config in case the primary one fails.", - "required": [ - "enabled" - ], + "required": ["enabled"], "properties": { "enabled": { "type": "boolean", @@ -411,9 +444,7 @@ "type": "object", "description": "The configuration for the client authentication. The client authentication is used to authenticate the clients using the provided certificate.", "additionalProperties": false, - "required": [ - "cert_file" - ], + "required": ["cert_file"], "properties": { "required": { "type": "boolean", @@ -436,10 +467,7 @@ } }, "then": { - "required": [ - "cert_file", - "key_file" - ] + "required": ["cert_file", "key_file"] } }, "client": { @@ -554,9 +582,7 @@ "allow_list": { "type": "array", "description": "The names of the headers to forward. The default value is 'Authorization'.", - "default": [ - "Authorization" - ], + "default": ["Authorization"], "items": { "type": "string" } @@ -575,9 +601,7 @@ "allow_list": { "type": "array", "description": "The names of the query parameters to forward. The default value is 'Authorization'.", - "default": [ - "Authorization" - ], + "default": ["Authorization"], "items": { "type": "string" } @@ -692,10 +716,7 @@ "type": "string", "default": "redact", "description": "The method used to anonymize the IP addresses. The supported methods are 'redact' and 'hash'. The default value is 'redact'. The 'redact' method replaces the IP addresses with the string '[REDACTED]'. The 'hash' method hashes the IP addresses using the SHA-256 algorithm.", - "enum": [ - "redact", - "hash" - ] + "enum": ["redact", "hash"] } } } @@ -740,14 +761,7 @@ }, "level": { "type": "string", - "enum": [ - "debug", - "info", - "warn", - "error", - "panic", - "fatal" - ], + "enum": ["debug", "info", "warn", "error", "panic", "fatal"], "description": "The log level for access logs. The log level is used to control the verbosity of the access logs. The default value is 'info'.", "default": "info" }, @@ -830,9 +844,7 @@ "ignore_query_params_list": { "type": "array", "description": "List of query params to be ignored from being logged in the query field.", - "default": [ - "variables" - ], + "default": ["variables"], "items": { "type": "string" } @@ -905,10 +917,7 @@ "items": { "type": "object", "additionalProperties": false, - "required": [ - "key", - "value" - ], + "required": ["key", "value"], "properties": { "key": { "type": "string", @@ -928,9 +937,7 @@ "type": "object", "description": "The configuration for custom attributes. Custom attributes can be created from request headers or static values. Keep in mind, that every new custom attribute increases the cardinality of the pipeline.", "additionalProperties": false, - "required": [ - "key" - ], + "required": ["key"], "properties": { "key": { "type": "string", @@ -956,14 +963,10 @@ }, "oneOf": [ { - "required": [ - "request_header" - ] + "required": ["request_header"] }, { - "required": [ - "expression" - ] + "required": ["expression"] } ] } @@ -1010,9 +1013,7 @@ "description": "The exporters to use to export the traces. If no exporters are specified, the default Cosmo Cloud exporter is used. If you override, please make sure to include the default exporter.", "items": { "type": "object", - "required": [ - "endpoint" - ], + "required": ["endpoint"], "additionalProperties": false, "properties": { "disabled": { @@ -1022,10 +1023,7 @@ "type": "string", "description": "The exporter to use for the traces. The supported exporters are 'http' and 'grpc'.", "default": "http", - "enum": [ - "http", - "grpc" - ] + "enum": ["http", "grpc"] }, "endpoint": { "type": "string" @@ -1112,9 +1110,7 @@ "type": "object", "description": "The configuration for custom span attributes for subgraph tracing.", "additionalProperties": false, - "required": [ - "key" - ], + "required": ["key"], "properties": { "key": { "type": "string", @@ -1132,9 +1128,7 @@ }, "oneOf": [ { - "required": [ - "expression" - ] + "required": ["expression"] } ] } @@ -1178,9 +1172,7 @@ "type": "object", "description": "The configuration for custom attributes. Custom attributes can be created from request headers, static values or context fields. Not every context fields are available at all request life-cycle stages. If a value is a list, the value is JSON encoded for OTLP. For Prometheus, the values are exploded into multiple metrics with unique labels. Keep in mind, that every new custom attribute increases the cardinality.", "additionalProperties": false, - "required": [ - "key" - ], + "required": ["key"], "properties": { "key": { "type": "string", @@ -1294,10 +1286,7 @@ "type": "string", "description": "The exporter protocol to use to export metrics. The supported exporters are 'http' and 'grpc'.", "default": "http", - "enum": [ - "http", - "grpc" - ] + "enum": ["http", "grpc"] }, "endpoint": { "type": "string", @@ -1319,16 +1308,10 @@ "temporality": { "type": "string", "description": "Temporality defines the window that an aggregation is calculated over.", - "enum": [ - "delta", - "cumulative" - ] + "enum": ["delta", "cumulative"] } }, - "required": [ - "exporter", - "endpoint" - ] + "required": ["exporter", "endpoint"] } }, "exclude_metrics": { @@ -1532,32 +1515,18 @@ "allow_origins": { "type": "array", "description": "The allowed origins. The default value is to allow all origins. The value can be a list of origins or the wildcard '*'.", - "default": [ - "*" - ], + "default": ["*"], "items": { "type": "string" } }, "allow_methods": { "type": "array", - "default": [ - "GET", - "POST", - "HEAD" - ], + "default": ["GET", "POST", "HEAD"], "description": "The allowed HTTP methods. The default value is to allow the methods 'GET', 'POST', and 'HEAD'.", "items": { "type": "string", - "enum": [ - "GET", - "POST", - "HEAD", - "PUT", - "DELETE", - "PATCH", - "OPTIONS" - ] + "enum": ["GET", "POST", "HEAD", "PUT", "DELETE", "PATCH", "OPTIONS"] } }, "allow_headers": { @@ -1687,14 +1656,7 @@ }, "log_level": { "type": "string", - "enum": [ - "debug", - "info", - "warn", - "error", - "panic", - "fatal" - ], + "enum": ["debug", "info", "warn", "error", "panic", "fatal"], "description": "The log level. The log level is used to control the verbosity of the logs. The default value is 'info'.", "default": "info" }, @@ -1945,23 +1907,17 @@ "enabled": { "type": "boolean", "description": "Determines whether cache control policy is enabled.", - "examples": [ - true - ] + "examples": [true] }, "value": { "type": "string", "description": "Global cache control value.", - "examples": [ - "max-age=180, public" - ] + "examples": ["max-age=180, public"] }, "subgraphs": { "type": "array", "description": "Subgraph-specific cache control settings.", - "required": [ - "name" - ], + "required": ["name"], "additionalProperties": false, "items": { "type": "object", @@ -1969,24 +1925,18 @@ "name": { "type": "string", "description": "Name of the subgraph.", - "examples": [ - "products" - ] + "examples": ["products"] }, "value": { "type": "string", "description": "Cache control value for the subgraph.", - "examples": [ - "max-age=60, public" - ] + "examples": ["max-age=60, public"] } } } } }, - "required": [ - "enabled" - ], + "required": ["enabled"], "additionalProperties": false }, "modules": { @@ -2033,11 +1983,7 @@ "symmetric_algorithm": { "type": "string", "description": "The symmetric algorithm used", - "enum": [ - "HS256", - "HS384", - "HS512" - ] + "enum": ["HS256", "HS384", "HS512"] }, "header_key_id": { "type": "string", @@ -2049,11 +1995,7 @@ "default": ["sig"], "items": { "type": "string", - "enum": [ - "sig", - "enc", - "" - ] + "enum": ["sig", "enc", ""] } }, "algorithms": { @@ -2123,56 +2065,36 @@ }, "oneOf": [ { - "required": [ - "url" - ], + "required": ["url"], "not": { "anyOf": [ { - "required": [ - "secret" - ] + "required": ["secret"] }, { - "required": [ - "symmetric_algorithm" - ] + "required": ["symmetric_algorithm"] }, { - "required": [ - "header_key_id" - ] + "required": ["header_key_id"] } ] } }, { - "required": [ - "secret", - "symmetric_algorithm", - "header_key_id" - ], + "required": ["secret", "symmetric_algorithm", "header_key_id"], "not": { "anyOf": [ { - "required": [ - "url" - ] + "required": ["url"] }, { - "required": [ - "algorithms" - ] + "required": ["algorithms"] }, { - "required": [ - "refresh_interval" - ] + "required": ["refresh_interval"] }, { - "required": [ - "refresh_unknown_kid" - ] + "required": ["refresh_unknown_kid"] } ] } @@ -2200,17 +2122,13 @@ "type": { "type": "string", "description": "The type of the source. The only currently supported type is 'header'.", - "enum": [ - "header" - ] + "enum": ["header"] }, "name": { "type": "string", "description": "The name of the header. The header is used to extract the token from the request.", "format": "http-header", - "examples": [ - "X-Authorization" - ] + "examples": ["X-Authorization"] }, "value_prefixes": { "type": "array", @@ -2220,10 +2138,7 @@ } } }, - "required": [ - "type", - "name" - ] + "required": ["type", "name"] } } } @@ -2259,9 +2174,7 @@ }, "strategy": { "type": "string", - "enum": [ - "simple" - ], + "enum": ["simple"], "description": "The strategy used to enforce the rate limit. The supported strategies are 'simple'." }, "simple_strategy": { @@ -2300,18 +2213,12 @@ "description": "Hide the rate limit stats from the response extension. If the value is true, the rate limit stats are not included in the response extension." } }, - "required": [ - "rate", - "burst", - "period" - ] + "required": ["rate", "burst", "period"] }, "storage": { "type": "object", "additionalProperties": false, - "required": [ - "urls" - ], + "required": ["urls"], "properties": { "cluster_enabled": { "type": "boolean", @@ -2433,9 +2340,7 @@ "description": "The ID of the storage provider to use for loading GraphQL operations. Only storage provider of type 'file_system' are supported. The provider must be configured in the storage_providers section." } }, - "required": [ - "provider_id" - ] + "required": ["provider_id"] }, "session": { "type": "object", @@ -2556,10 +2461,7 @@ "description": "Configuration used by the EDFS provider to connect to the NATS server.", "items": { "type": "object", - "required": [ - "id", - "url" - ], + "required": ["id", "url"], "additionalProperties": false, "properties": { "id": { @@ -2577,9 +2479,7 @@ "oneOf": [ { "type": "object", - "required": [ - "token" - ], + "required": ["token"], "additionalProperties": false, "properties": { "token": { @@ -2596,10 +2496,7 @@ "type": "object", "description": "Userinfo configuration for the NATS provider.", "additionalProperties": false, - "required": [ - "username", - "password" - ], + "required": ["username", "password"], "properties": { "username": { "type": "string", @@ -2629,10 +2526,7 @@ "items": { "type": "object", "additionalProperties": false, - "required": [ - "id", - "brokers" - ], + "required": ["id", "brokers"], "properties": { "id": { "type": "string", @@ -2664,18 +2558,13 @@ { "type": "object", "additionalProperties": false, - "required": [ - "sasl_plain" - ], + "required": ["sasl_plain"], "properties": { "sasl_plain": { "type": "object", "description": "Plain SASL Authentication configuration for the Kafka provider.", "additionalProperties": false, - "required": [ - "username", - "password" - ], + "required": ["username", "password"], "properties": { "username": { "type": "string", @@ -2692,19 +2581,13 @@ { "type": "object", "additionalProperties": false, - "required": [ - "sasl_scram" - ], + "required": ["sasl_scram"], "properties": { "sasl_scram": { "type": "object", "description": "SCRAM SASL Authentication configuration for the Kafka provider.", "additionalProperties": false, - "required": [ - "username", - "password", - "mechanism" - ], + "required": ["username", "password", "mechanism"], "properties": { "username": { "type": "string", @@ -2717,10 +2600,7 @@ "mechanism": { "type": "string", "description": "The mechanism for SCRAM SASL authentication.", - "enum": [ - "SCRAM-SHA-256", - "SCRAM-SHA-512" - ] + "enum": ["SCRAM-SHA-256", "SCRAM-SHA-512"] } } } @@ -2742,10 +2622,7 @@ "items": { "type": "object", "additionalProperties": false, - "required": [ - "id", - "urls" - ], + "required": ["id", "urls"], "properties": { "id": { "type": "string", @@ -3526,10 +3403,7 @@ }, "mode": { "type": "string", - "enum": [ - "wrapped", - "pass-through" - ], + "enum": ["wrapped", "pass-through"], "default": "wrapped", "description": "The mode of error propagation. The supported modes are 'wrapped' (default) and 'pass-through'. The 'wrapped' mode wraps the error in a custom error object to hide internals. The 'pass-through' mode returns the error as is from the Subgraph." }, @@ -3553,9 +3427,7 @@ "items": { "type": "string" }, - "default": [ - "code" - ], + "default": ["code"], "description": "The allowed extension fields. The allowed extension fields are used to specify which fields of the Subgraph errors are allowed to be propagated to the client." }, "allow_all_extension_fields": { @@ -3949,9 +3821,7 @@ "algorithm": { "type": "string", "description": "The algorithm used to calculate the retry interval. The supported algorithms are 'backoff_jitter'.", - "enum": [ - "backoff_jitter" - ] + "enum": ["backoff_jitter"] }, "max_attempts": { "type": "integer", @@ -3987,58 +3857,40 @@ "properties": { "op": { "type": "string", - "enum": [ - "propagate" - ], - "examples": [ - "propagate" - ], + "enum": ["propagate"], + "examples": ["propagate"], "description": "The operation to perform on the header. The supported operations are 'propagate'. The 'propagate' operation is used to propagate the header to the subgraphs." }, "matching": { "type": "string", - "examples": [ - "(?i)^X-Custom-.*" - ], + "examples": ["(?i)^X-Custom-.*"], "description": "The matching rule for the header. The matching rule is a regular expression that is used to match the header. Can't be used with 'named'." }, "negate_match": { "type": "boolean", - "examples": [ - "true" - ], + "examples": ["true"], "description": "If set to true, the result of the 'matching' regex will be inverted. This is useful for simulating negative lookahead behavior, which is not natively supported." }, "named": { "type": "string", - "examples": [ - "X-Test-Header" - ], + "examples": ["X-Test-Header"], "description": "The name of the header to match. Use the canonical version e.g. X-Test-Header. Can't be used with 'matching'." }, "rename": { "type": "string", - "examples": [ - "X-Rename-Test-Header" - ], + "examples": ["X-Rename-Test-Header"], "description": "Rename is used to rename the named or the matching headers. It can be used with either the named or the matching." }, "default": { "type": "string", - "examples": [ - "default-value" - ], + "examples": ["default-value"], "description": "The default value of the header in case it is not present in the request." } }, "dependentRequired": { - "negate_match": [ - "matching" - ] + "negate_match": ["matching"] }, - "required": [ - "op" - ] + "required": ["op"] }, "traffic_shaping_header_response_rule": { "type": "object", @@ -4047,71 +3899,46 @@ "properties": { "op": { "type": "string", - "enum": [ - "propagate" - ], - "examples": [ - "propagate" - ], + "enum": ["propagate"], + "examples": ["propagate"], "description": "The operation to perform on the header. The supported operations are 'propagate'. The 'propagate' operation is used to propagate the header to the subgraphs." }, "matching": { "type": "string", - "examples": [ - "(?i)^X-Custom-.*" - ], + "examples": ["(?i)^X-Custom-.*"], "description": "The matching rule for the header. The matching rule is a regular expression that is used to match the header. Can't be used with 'named'." }, "negate_match": { "type": "boolean", - "examples": [ - "true" - ], + "examples": ["true"], "description": "If set to true, the result of the 'matching' regex will be inverted. This is useful for simulating negative lookahead behavior, which is not natively supported." }, "named": { "type": "string", - "examples": [ - "X-Test-Header" - ], + "examples": ["X-Test-Header"], "description": "The name of the header to match. Use the canonical version e.g. X-Test-Header. Can't be used with 'matching'." }, "rename": { "type": "string", - "examples": [ - "X-Rename-Test-Header" - ], + "examples": ["X-Rename-Test-Header"], "description": "Rename is used to rename the named or the matching headers. It can be used with either the named or the matching." }, "default": { "type": "string", - "examples": [ - "default-value" - ], + "examples": ["default-value"], "description": "The default value of the header in case it is not present in the request." }, "algorithm": { "type": "string", - "enum": [ - "first_write", - "last_write", - "append" - ], - "examples": [ - "first_write" - ], + "enum": ["first_write", "last_write", "append"], + "examples": ["first_write"], "description": "The algorith, to use when multiple headers are present. The supported operations are '\"first_write\", \"last_write\", and \"append\". The 'first_write' retains the first value of a given header. The 'last_write' retains the last value of a given header. The 'append' appends all values of a given header." } }, "dependentRequired": { - "negate_match": [ - "matching" - ] + "negate_match": ["matching"] }, - "required": [ - "op", - "algorithm" - ] + "required": ["op", "algorithm"] }, "router_response_header_rule": { "type": "object", @@ -4142,16 +3969,12 @@ }, "name": { "type": "string", - "examples": [ - "X-API-Key" - ], + "examples": ["X-API-Key"], "description": "The name of the header to set." }, "value": { "type": "string", - "examples": [ - "My-Secret-Value" - ], + "examples": ["My-Secret-Value"], "description": "The value to set for the header. This can include environment variables." }, "expression": { @@ -4162,39 +3985,26 @@ "type": "object", "description": "DEPRECATED: Use expression instead. The configuration for the value from. The value from is used to extract a value from a request context and propagate it to subgraphs. This is currently only valid in requests", "additionalProperties": false, - "required": [ - "context_field" - ], + "required": ["context_field"], "properties": { "context_field": { "type": "string", "description": "The field name of the context from which to extract the value. The value is only extracted when a context is available otherwise the default value is used.", - "enum": [ - "operation_name" - ] + "enum": ["operation_name"] } } } }, - "required": [ - "op", - "name" - ], + "required": ["op", "name"], "oneOf": [ { - "required": [ - "value" - ] + "required": ["value"] }, { - "required": [ - "expression" - ] + "required": ["expression"] }, { - "required": [ - "value_from" - ] + "required": ["value_from"] } ] }, @@ -4205,9 +4015,7 @@ "type": "object", "description": "The configuration for custom fields. Custom attributes can be created from request headers or context fields. Not every context fields are available at all request life-cycle stages. If a value is a list, the value is JSON encoded for OTLP. For Prometheus, the values are exploded into multiple metrics with unique labels. Keep in mind, that every new custom attribute increases the cardinality.", "additionalProperties": false, - "required": [ - "key" - ], + "required": ["key"], "properties": { "key": { "type": "string", @@ -4260,4 +4068,4 @@ } } } -} \ No newline at end of file +} diff --git a/router/pkg/config/config_test.go b/router/pkg/config/config_test.go index 280d05fd9c..d93103fb9e 100644 --- a/router/pkg/config/config_test.go +++ b/router/pkg/config/config_test.go @@ -505,7 +505,7 @@ persisted_operations: require.NoError(t, err, &js) } -func TestInvalidPersistedOperations(t *testing.T) { +func TestPersistedOperationsStorageWithoutObjectPrefix(t *testing.T) { t.Parallel() f := createTempFileFromFixture(t, ` @@ -525,12 +525,9 @@ persisted_operations: size: 100MB storage: provider_id: s3 - # Missing object_prefix `) _, err := LoadConfig([]string{f}) - var js *jsonschema.ValidationError - require.ErrorAs(t, err, &js) - require.Equal(t, "at '/persisted_operations/storage': missing property 'object_prefix'", js.Causes[0].Error()) + require.NoError(t, err) } func TestValidExecutionConfig(t *testing.T) { @@ -1000,6 +997,15 @@ func TestConfigMerging(t *testing.T) { ProviderID: "s3", ObjectPrefix: "ee", }, + Manifest: PQLManifestConfig{ + PollInterval: 10 * time.Second, + PollJitter: 5 * time.Second, + Warmup: PQLManifestWarmupConfig{ + Enabled: true, + Workers: 4, + Timeout: 30 * time.Second, + }, + }, }, AutomaticPersistedQueries: AutomaticPersistedQueriesConfig{ Storage: AutomaticPersistedQueriesStorageConfig{ @@ -1831,3 +1837,108 @@ security: require.NoError(t, err) }) } + +func TestPQLManifestConfig(t *testing.T) { + t.Run("defaults", func(t *testing.T) { + t.Parallel() + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" +`) + cfg, err := LoadConfig([]string{f}) + require.NoError(t, err) + + require.False(t, cfg.Config.PersistedOperationsConfig.Manifest.Enabled) + require.Equal(t, 10*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollInterval) + require.Equal(t, 5*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollJitter) + }) + + t.Run("yaml config", func(t *testing.T) { + t.Parallel() + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" + +persisted_operations: + manifest: + enabled: true + poll_interval: 60s + poll_jitter: 15s +`) + cfg, err := LoadConfig([]string{f}) + require.NoError(t, err) + + require.True(t, cfg.Config.PersistedOperationsConfig.Manifest.Enabled) + require.Equal(t, 60*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollInterval) + require.Equal(t, 15*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollJitter) + }) + + t.Run("env variables", func(t *testing.T) { + t.Setenv("PERSISTED_OPERATIONS_MANIFEST_ENABLED", "true") + t.Setenv("PERSISTED_OPERATIONS_MANIFEST_POLL_INTERVAL", "45s") + t.Setenv("PERSISTED_OPERATIONS_MANIFEST_POLL_JITTER", "8s") + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" +`) + cfg, err := LoadConfig([]string{f}) + require.NoError(t, err) + + require.True(t, cfg.Config.PersistedOperationsConfig.Manifest.Enabled) + require.Equal(t, 45*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollInterval) + require.Equal(t, 8*time.Second, cfg.Config.PersistedOperationsConfig.Manifest.PollJitter) + }) + + t.Run("poll_interval below minimum rejected", func(t *testing.T) { + t.Parallel() + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" + +persisted_operations: + manifest: + enabled: true + poll_interval: 5s +`) + _, err := LoadConfig([]string{f}) + + var js *jsonschema.ValidationError + require.ErrorAs(t, err, &js) + require.Equal(t, []string{"persisted_operations", "manifest", "poll_interval"}, js.Causes[0].InstanceLocation) + require.Equal(t, "at '/persisted_operations/manifest/poll_interval': duration must be greater or equal than 10s", js.Causes[0].Error()) + }) + + t.Run("poll_jitter below minimum rejected", func(t *testing.T) { + t.Parallel() + + f := createTempFileFromFixture(t, ` +version: "1" + +graph: + token: "token" + +persisted_operations: + manifest: + enabled: true + poll_jitter: 500ms +`) + _, err := LoadConfig([]string{f}) + + var js *jsonschema.ValidationError + require.ErrorAs(t, err, &js) + require.Equal(t, []string{"persisted_operations", "manifest", "poll_jitter"}, js.Causes[0].InstanceLocation) + require.Equal(t, "at '/persisted_operations/manifest/poll_jitter': duration must be greater or equal than 1s", js.Causes[0].Error()) + }) +} diff --git a/router/pkg/config/fixtures/full.yaml b/router/pkg/config/fixtures/full.yaml index 3befb85996..b7ce536997 100644 --- a/router/pkg/config/fixtures/full.yaml +++ b/router/pkg/config/fixtures/full.yaml @@ -24,7 +24,7 @@ introspection: enabled: true secret: 'AN_EXAMPLE_PLACEHOLDER_SECRET_ONLY' json_log: true -log_service_name: "my-custom-router" +log_service_name: 'my-custom-router' shutdown_delay: 15s grace_period: 20s poll_interval: 10s @@ -205,7 +205,6 @@ telemetry: schema_usage: enabled: true include_operation_sha: true - sample_rate: 1.0 # Supports any rate: 1.0, 0.8, 0.5, 0.1, 0.01, etc. cache_control_policy: enabled: true @@ -495,6 +494,10 @@ persisted_operations: storage: provider_id: s3 object_prefix: '5ef73d80-cae4-4d0e-98a7-1e9fa922c1a4/92c25b45-a75b-4954-b8f6-6592a9b203eb/operations/foo' + manifest: + enabled: true + poll_interval: 30s + poll_jitter: 10s automatic_persisted_queries: enabled: true diff --git a/router/pkg/config/testdata/config_defaults.json b/router/pkg/config/testdata/config_defaults.json index 817b683454..2c845745b3 100644 --- a/router/pkg/config/testdata/config_defaults.json +++ b/router/pkg/config/testdata/config_defaults.json @@ -542,6 +542,17 @@ "Storage": { "ProviderID": "", "ObjectPrefix": "" + }, + "Manifest": { + "Enabled": false, + "PollInterval": 10000000000, + "PollJitter": 5000000000, + "Warmup": { + "Enabled": true, + "Workers": 4, + "ItemsPerSecond": 50, + "Timeout": 30000000000 + } } }, "AutomaticPersistedQueries": { diff --git a/router/pkg/config/testdata/config_full.json b/router/pkg/config/testdata/config_full.json index bd307ab0c3..d8c5f76db2 100644 --- a/router/pkg/config/testdata/config_full.json +++ b/router/pkg/config/testdata/config_full.json @@ -977,6 +977,17 @@ "Storage": { "ProviderID": "s3", "ObjectPrefix": "5ef73d80-cae4-4d0e-98a7-1e9fa922c1a4/92c25b45-a75b-4954-b8f6-6592a9b203eb/operations/foo" + }, + "Manifest": { + "Enabled": true, + "PollInterval": 30000000000, + "PollJitter": 10000000000, + "Warmup": { + "Enabled": true, + "Workers": 4, + "ItemsPerSecond": 50, + "Timeout": 30000000000 + } } }, "AutomaticPersistedQueries": {