diff --git a/packages/@aws-cdk-testing/framework-integ/test/core/test/tree-metadata.test.ts b/packages/@aws-cdk-testing/framework-integ/test/core/test/tree-metadata.test.ts index 462bbba15ff71..df9438d1d129b 100644 --- a/packages/@aws-cdk-testing/framework-integ/test/core/test/tree-metadata.test.ts +++ b/packages/@aws-cdk-testing/framework-integ/test/core/test/tree-metadata.test.ts @@ -6,6 +6,7 @@ import * as path from 'path'; import { Construct } from 'constructs'; import * as cxschema from 'aws-cdk-lib/cloud-assembly-schema'; import { App, CfnParameter, CfnResource, Lazy, Stack, TreeInspector } from 'aws-cdk-lib'; +import { TreeFile } from 'aws-cdk-lib/core/lib/private/tree-metadata'; abstract class AbstractCfnResource extends CfnResource { constructor(scope: Construct, id: string) { @@ -162,7 +163,9 @@ describe('tree metadata', () => { const treeArtifact = assembly.tree(); expect(treeArtifact).toBeDefined(); - expect(readJson(assembly.directory, treeArtifact!.file)).toEqual({ + const treeJson = readJson(assembly.directory, treeArtifact!.file); + + expect(treeJson).toEqual({ version: 'tree-0.1', tree: expect.objectContaining({ children: expect.objectContaining({ @@ -185,6 +188,91 @@ describe('tree metadata', () => { }); }); + /** + * Check that we can limit ourselves to a given tree file size + * + * We can't try the full 512MB because the test process will run out of memory + * before synthing such a large tree. + */ + test('tree.json can be split over multiple files', () => { + const MAX_NODES = 1_000; + const app = new App({ + context: { + '@aws-cdk/core.TreeMetadata:maxNodes': MAX_NODES, + }, + analyticsReporting: false, + }); + + // GIVEN + const buildStart = Date.now(); + const addedNodes = recurseBuild(app, 4, 4); + // eslint-disable-next-line no-console + console.log('Built tree in', Date.now() - buildStart, 'ms'); + + // WHEN + const synthStart = Date.now(); + const assembly = app.synth(); + // eslint-disable-next-line no-console + console.log('Synthed tree in', Date.now() - synthStart, 'ms'); + try { + const treeArtifact = assembly.tree(); + expect(treeArtifact).toBeDefined(); + + // THEN - does not explode, and file sizes are correctly limited + const sizes: Record = {}; + recurseVisit(assembly.directory, treeArtifact!.file, sizes); + + for (const size of Object.values(sizes)) { + expect(size).toBeLessThanOrEqual(MAX_NODES); + } + + expect(Object.keys(sizes).length).toBeGreaterThan(1); + + const foundNodes = sum(Object.values(sizes)); + expect(foundNodes).toEqual(addedNodes + 2); // App, Tree + } finally { + fs.rmSync(assembly.directory, { force: true, recursive: true }); + } + + function recurseBuild(scope: Construct, n: number, depth: number) { + if (depth === 0) { + const resourceCount = 450; + const stack = new Stack(scope, 'SomeStack'); + for (let i = 0; i < resourceCount; i++) { + new CfnResource(stack, `Resource${i}`, { type: 'Aws::Some::Resource' }); + } + return resourceCount + 3; // Also count Stack, BootstrapVersion, CheckBootstrapVersion + } + + let ret = 0; + for (let i = 0; i < n; i++) { + const parent = new Construct(scope, `Construct${i}`); + ret += 1; + ret += recurseBuild(parent, n, depth - 1); + } + return ret; + } + + function recurseVisit(directory: string, fileName: string, files: Record) { + let nodes = 0; + const treeJson: TreeFile = readJson(directory, fileName); + rec(treeJson.tree); + files[fileName] = nodes; + + function rec(x: TreeFile['tree']) { + if (isSubtreeReference(x)) { + // We'll count this node as part of our visit to the "real" node + recurseVisit(directory, x.fileName, files); + } else { + nodes += 1; + for (const child of Object.values(x.children ?? {})) { + rec(child); + } + } + } + } + }); + test('token resolution & cfn parameter', () => { const app = new App(); const stack = new Stack(app, 'mystack'); @@ -396,3 +484,15 @@ describe('tree metadata', () => { function readJson(outdir: string, file: string) { return JSON.parse(fs.readFileSync(path.join(outdir, file), 'utf-8')); } + +function isSubtreeReference(x: TreeFile['tree']): x is Extract { + return !!(x as any).fileName; +} + +function sum(xs: number[]) { + let ret = 0; + for (const x of xs) { + ret += x; + } + return ret; +} diff --git a/packages/aws-cdk-lib/core/lib/private/construct-iteration.ts b/packages/aws-cdk-lib/core/lib/private/construct-iteration.ts new file mode 100644 index 0000000000000..948812eac2284 --- /dev/null +++ b/packages/aws-cdk-lib/core/lib/private/construct-iteration.ts @@ -0,0 +1,22 @@ +import { IConstruct } from 'constructs'; +import { LinkedQueue } from './linked-queue'; + +/** + * Breadth-first iterator over the construct tree + */ +export function* iterateBfs(root: IConstruct) { + // Use a specialized queue data structure. Using `Array.shift()` + // has a huge performance penalty (difference on the order of + // ~50ms vs ~1s to iterate a large construct tree) + const queue = new LinkedQueue<{ construct: IConstruct; parent: IConstruct | undefined }>([{ construct: root, parent: undefined }]); + + let next = queue.shift(); + while (next) { + for (const child of next.construct.node.children) { + queue.push({ construct: child, parent: next.construct }); + } + yield next; + + next = queue.shift(); + } +} diff --git a/packages/aws-cdk-lib/core/lib/private/linked-queue.ts b/packages/aws-cdk-lib/core/lib/private/linked-queue.ts new file mode 100644 index 0000000000000..eac5bb9221ec6 --- /dev/null +++ b/packages/aws-cdk-lib/core/lib/private/linked-queue.ts @@ -0,0 +1,45 @@ +/** + * A queue that is faster than an array at large throughput + */ +export class LinkedQueue { + private head?: Node; + private last?: Node; + + constructor(items?: Iterable) { + if (items) { + for (const x of items) { + this.push(x); + } + } + } + + public push(value: A) { + const node: Node = { value }; + if (this.head && this.last) { + this.last.next = node; + this.last = node; + } else { + this.head = node; + this.last = node; + } + } + + public shift(): A | undefined { + if (!this.head) { + return undefined; + } + const ret = this.head.value; + + this.head = this.head.next; + if (!this.head) { + this.last = undefined; + } + + return ret; + } +} + +interface Node { + value: A; + next?: Node; +} diff --git a/packages/aws-cdk-lib/core/lib/private/tree-metadata.ts b/packages/aws-cdk-lib/core/lib/private/tree-metadata.ts index b6628e1a595af..99e8696166aef 100644 --- a/packages/aws-cdk-lib/core/lib/private/tree-metadata.ts +++ b/packages/aws-cdk-lib/core/lib/private/tree-metadata.ts @@ -8,6 +8,7 @@ import { Annotations } from '../annotations'; import { Stack } from '../stack'; import { ISynthesisSession } from '../stack-synthesizers'; import { IInspectable, TreeInspector } from '../tree'; +import { iterateBfs } from './construct-iteration'; const FILE_PATH = 'tree.json'; @@ -15,7 +16,6 @@ const FILE_PATH = 'tree.json'; * Construct that is automatically attached to the top-level `App`. * This generates, as part of synthesis, a file containing the construct tree and the metadata for each node in the tree. * The output is in a tree format so as to preserve the construct hierarchy. - * */ export class TreeMetadata extends Construct { constructor(scope: Construct) { @@ -27,45 +27,33 @@ export class TreeMetadata extends Construct { * @internal */ public _synthesizeTree(session: ISynthesisSession) { - const lookup: { [path: string]: Node } = { }; - - const visit = (construct: IConstruct): Node => { - const children = construct.node.children.map((c) => { - try { - return visit(c); - } catch (e) { - Annotations.of(this).addWarningV2(`@aws-cdk/core:failedToRenderTreeMetadata-${c.node.id}`, `Failed to render tree metadata for node [${c.node.id}]. Reason: ${e}`); - return undefined; - } - }); - const childrenMap = children - .filter((child) => child !== undefined) - .reduce((map, child) => Object.assign(map, { [child!.id]: child }), {}); + // This is for testing + const maxNodesPerTree = this.node.tryGetContext('@aws-cdk/core.TreeMetadata:maxNodes'); + + const builder = session.assembly; + const writer = new FragmentedTreeWriter(builder.outdir, FILE_PATH, { maxNodesPerTree }); + for (const { construct, parent } of iterateBfs(this.node.root)) { const node: Node = { id: construct.node.id || 'App', path: construct.node.path, - children: Object.keys(childrenMap).length === 0 ? undefined : childrenMap, - attributes: this.synthAttributes(construct), constructInfo: constructInfoFromConstruct(construct), }; + try { + node.attributes = this.synthAttributes(construct); + } catch (e) { + Annotations.of(this).addWarningV2(`@aws-cdk/core:failedToRenderTreeMetadata-${construct.node.id}`, `Failed to render tree metadata for node [${construct.node.id}]. Reason: ${e}`); + } - lookup[node.path] = node; - - return node; - }; + writer.addNode(construct, parent, node); + } - const tree = { - version: 'tree-0.1', - tree: visit(this.node.root), - }; - const builder = session.assembly; - fs.writeFileSync(path.join(builder.outdir, FILE_PATH), JSON.stringify(tree), { encoding: 'utf-8' }); + const rootFilename = writer.writeForest(); builder.addArtifact('Tree', { type: ArtifactType.CDK_TREE, properties: { - file: FILE_PATH, + file: rootFilename, }, }); } @@ -87,14 +75,271 @@ export class TreeMetadata extends Construct { } } -export interface Node { +/** + * Serializable representation of a construct + */ +interface Node { + /** + * The construct's ID + * + * Even though this ID is already in the `children` map of the containing node, + * we repeat it here. + */ readonly id: string; + + /** + * The construct's path + * + * Even though this path can be constructed from the construct IDs of constructs + * on the root path to this construct, we still repeat it here. + * + * FIXME: In a sizeable file (tested on 136MB) this takes about 20% of the + * total size without adding any value. We should probably remove this at some + * point. + */ readonly path: string; - readonly children?: { [key: string]: Node }; - readonly attributes?: { [key: string]: any }; + children?: { [key: string]: TreeNode }; + attributes?: { [key: string]: unknown }; /** * Information on the construct class that led to this node, if available */ - readonly constructInfo?: ConstructInfo; + constructInfo?: ConstructInfo; } + +export interface TreeFile { + version: 'tree-0.1'; + tree: TreeNode; +} + +type TreeNode = Node | SubTreeReference; + +/** + * A reference to a node that is stored in an entirely different tree.json file + */ +interface SubTreeReference { + readonly id: string; + readonly path: string; + readonly fileName: string; +} + +/** + * Write the Node tree in fragments + * + * We can't write the entire tree file in one go, because it might exceed 512MB serialized, + * which is the largest string size that NodeJS will work with. Anything larger than that will + * just fail. + * + * To write the tree, we will do the following: + * + * - Iterate through the tree in a breadth-first manner, building the serializable version + * of the tree as we go. + * - Once we get to a threshold of N nodes, when we try to add a new child node to the tree + * we will convert the prospective parent to the root of a new tree and replace it + * with a reference in the original tree. + * - Choosing this method instead of making the child a new root because we have to + * assume that all leaf nodes of a "full" tree will still get children added to them, + * and there will be C=(avg outdegree)^(tree depth) of them. Converting the existing + * leaves in-place to a different node type will (probably) minimally change + * the size of the tree, whereas adding C more children that will all become + * references to substrees will add an unpredictable size to the tree. + * + * Here's a sense of the numbers: a project with 277k nodes leads to an 136M JSON + * file (490 bytes/node). We'll estimate the size of a node to be 1000 bytes. + */ +class FragmentedTreeWriter { + private readonly forest = new Array(); + + /** + * Maps a Construct to its respective Node + */ + private readonly constructMap = new Map(); + + /** + * Map a root Node to its containing Tree + */ + private readonly subtreeRoots = new Map(); + + /** + * Map a Node to its parent Node + */ + private readonly parent = new Map(); + + private readonly maxNodes: number; + + private subtreeCtr = 1; + + constructor(private readonly outdir: string, private readonly rootFilename: string, options?: FragmentedTreeWriterOptions) { + this.maxNodes = options?.maxNodesPerTree ?? 500_000; + } + + /** + * Write the forest to disk, return the root file name + */ + public writeForest(): string { + for (const tree of this.forest) { + const treeFile: TreeFile = { version: 'tree-0.1', tree: tree.root }; + fs.writeFileSync(path.join(this.outdir, tree.filename), JSON.stringify(treeFile), { encoding: 'utf-8' }); + } + + return this.rootFilename; + } + + public addNode(construct: IConstruct, parent: IConstruct | undefined, node: Node) { + // NOTE: we could copy the 'node' object to be safe against tampering, but we trust + // the consuming code so we know we don't need to. + + if (parent === undefined) { + if (this.forest.length > 0) { + throw new Error('Can only add exactly one node without a parent'); + } + + this.addNewTree(node, this.rootFilename); + } else { + // There was a provision in the old code for missing parents, so we're just going to ignore it + // if we can't find a parent. + const parentNode = this.constructMap.get(parent); + if (!parentNode) { + return; + } + + this.addToExistingTree(node, parentNode); + } + + this.constructMap.set(construct, node); + } + + /** + * Add a new tree with the given Node as root + */ + private addNewTree(root: Node, filename: string): Tree { + const tree: Tree = { + root, + filename, + nodes: nodeCount(root), + }; + + this.forest.push(tree); + this.subtreeRoots.set(root, tree); + + return tree; + } + + /** + * Add the given node to an existing tree, potentially splitting it + */ + private addToExistingTree(node: Node, parent: Node) { + let tree = this.treeForNode(parent); + if (this.isTreeFull(tree)) { + // We need to convert the tree to a subtree. Do that by moving the prospective + // parent to a new subtree (might also move its children), and converting the + // parent node in the original tree to a subtreereference. + const grandParent = this.parent.get(parent); + if (!grandParent) { + throw new Error(`Could not find parent of ${JSON.stringify(parent)}`); + } + + tree = this.addNewTree(parent, `tree-${this.subtreeCtr++}.json`); + + setChild(grandParent, { + id: parent.id, + path: parent.path, + fileName: tree.filename, + } satisfies SubTreeReference); + + // To be strictly correct we should decrease the original tree's nodeCount here, because + // we may have moved away any number of children as well. We don't do that; the tree + // will remain 'full' and every new node added will lead to a new subtree. + + // Record the new root for this subtree + this.subtreeRoots.set(parent, tree); + } + + // Add into existing tree + setChild(parent, node); + this.parent.set(node, parent); + tree.nodes += 1; + } + + /** + * Whether the given tree is full + */ + private isTreeFull(t: Tree) { + return t.nodes >= this.maxNodes; + } + + /** + * Return the Tree that contains the given Node + */ + private treeForNode(node: Node): Tree { + const tried = new Array(); + + let cur: Node | undefined = node; + tried.push(cur.path); + let tree = this.subtreeRoots.get(cur); + while (!tree && cur) { + cur = this.parent.get(cur); + tried.push(cur?.path); + tree = cur && this.subtreeRoots.get(cur); + } + if (tree) { + return tree; + } + throw new Error(`Could not find tree for node: ${JSON.stringify(node)}, tried ${tried}, ${Array.from(this.subtreeRoots).map(([k, v]) => `${k.path} => ${v.filename}`)}`); + } +} + +function nodeCount(root: Node) { + let ret = 0; + recurse(root); + return ret; + + function recurse(x: Node) { + ret += 1; + for (const child of Object.values(x.children ?? {})) { + recurse(child); + } + } +} + +/** + * Add a child to a parent node + * + * Makes sure the 'children' array exists + */ +function setChild(parent: Node, node: TreeNode) { + if (!parent.children) { + parent.children = {}; + } + parent.children[node.id] = node; +} + +interface FragmentedTreeWriterOptions { + /** + * The maximum number of nodes per tree file + * + * @default 500_000 + */ + readonly maxNodesPerTree?: number; +} + +interface Tree { + /** + * The root of this particular tree + */ + root: Node; + /** + * The filename that `root` will be serialized to + */ + filename: string; + + /** + * How many nodes are in this tree already + */ + nodes: number; +} + +export function isSubtreeReference(x: TreeFile['tree']): x is Extract { + return !!(x as any).fileName; +} + diff --git a/packages/aws-cdk-lib/core/test/private/linked-queue.test.ts b/packages/aws-cdk-lib/core/test/private/linked-queue.test.ts new file mode 100644 index 0000000000000..9ca540626f708 --- /dev/null +++ b/packages/aws-cdk-lib/core/test/private/linked-queue.test.ts @@ -0,0 +1,48 @@ +import * as fc from 'fast-check'; +import { LinkedQueue } from '../../lib/private/linked-queue'; + +type TestQueue = LinkedQueue; + +// The model holds an array, we test that queue and the array behave the same +interface Model { + array: string[]; +} + +class PushCommand implements fc.Command { + constructor(readonly value: string) {} + check() { return true; } + run(m: Model, r: TestQueue): void { + r.push(this.value); + m.array.push(this.value); + } + toString = () => `push(${this.value})`; +} +class ShiftCommand implements fc.Command { + check() { return true; } + run(m: Model, r: TestQueue): void { + const fromQueue = r.shift(); + const fromArray = m.array.shift(); + + expect(fromQueue).toEqual(fromArray); + } + toString = () => 'shift'; +} + +test('LinkedQueue behaves the same as array', () => { + // define the possible commands and their inputs + const allCommands = [ + fc.string().map((v) => new PushCommand(v)), + fc.constant(new ShiftCommand()), + ]; + + // run everything + fc.assert( + fc.property(fc.commands(allCommands, { size: '+1' }), (cmds) => { + const s = () => ({ + model: { array: [] } satisfies Model, + real: new LinkedQueue(), + }); + fc.modelRun(s, cmds); + }), + ); +});