From d7b6ad074e29fdd28dfa3b1485d354d257c4aa46 Mon Sep 17 00:00:00 2001 From: David Michon Date: Fri, 20 Sep 2024 16:22:34 -0700 Subject: [PATCH] [package-deps-hash] Expose `hashFilesAsync` API (#4934) * [package-deps-hash] Expose `hashFilesAsync` API * Update comments * rush change * Expose from index --------- Co-authored-by: David Michon --- .../hash-files-async_2024-09-20-22-45.json | 10 +++ common/reviews/api/package-deps-hash.api.md | 3 + .../package-deps-hash/src/getRepoState.ts | 81 ++++++++++++++----- libraries/package-deps-hash/src/index.ts | 3 +- 4 files changed, 77 insertions(+), 20 deletions(-) create mode 100644 common/changes/@rushstack/package-deps-hash/hash-files-async_2024-09-20-22-45.json diff --git a/common/changes/@rushstack/package-deps-hash/hash-files-async_2024-09-20-22-45.json b/common/changes/@rushstack/package-deps-hash/hash-files-async_2024-09-20-22-45.json new file mode 100644 index 00000000000..520e1c22dfa --- /dev/null +++ b/common/changes/@rushstack/package-deps-hash/hash-files-async_2024-09-20-22-45.json @@ -0,0 +1,10 @@ +{ + "changes": [ + { + "packageName": "@rushstack/package-deps-hash", + "comment": "Expose `hashFilesAsync` API. This serves a similar role as `getGitHashForFiles` but is asynchronous and allows for the file names to be provided as an async iterable.", + "type": "minor" + } + ], + "packageName": "@rushstack/package-deps-hash" +} \ No newline at end of file diff --git a/common/reviews/api/package-deps-hash.api.md b/common/reviews/api/package-deps-hash.api.md index def3204392e..1a7df0094e4 100644 --- a/common/reviews/api/package-deps-hash.api.md +++ b/common/reviews/api/package-deps-hash.api.md @@ -22,6 +22,9 @@ export function getRepoRoot(currentWorkingDirectory: string, gitPath?: string): // @beta export function getRepoStateAsync(rootDirectory: string, additionalRelativePathsToHash?: string[], gitPath?: string): Promise>; +// @beta +export function hashFilesAsync(rootDirectory: string, filesToHash: Iterable | AsyncIterable, gitPath?: string): Promise>; + // @beta export interface IFileDiffStatus { // (undocumented) diff --git a/libraries/package-deps-hash/src/getRepoState.ts b/libraries/package-deps-hash/src/getRepoState.ts index c5eda993b42..c8992ac275d 100644 --- a/libraries/package-deps-hash/src/getRepoState.ts +++ b/libraries/package-deps-hash/src/getRepoState.ts @@ -298,6 +298,61 @@ async function spawnGitAsync( return stdout; } +function isIterable(value: Iterable | AsyncIterable): value is Iterable { + return Symbol.iterator in value; +} + +/** + * Uses `git hash-object` to hash the provided files. Unlike `getGitHashForFiles`, this API is asynchronous, and also allows for + * the input file paths to be specified as an async iterable. + * + * @param rootDirectory - The root directory to which paths are specified relative. Must be the root of the Git repository. + * @param filesToHash - The file paths to hash using `git hash-object` + * @param gitPath - The path to the Git executable + * @returns An iterable of [filePath, hash] pairs + * + * @remarks + * The input file paths must be specified relative to the Git repository root, or else be absolute paths. + * @beta + */ +export async function hashFilesAsync( + rootDirectory: string, + filesToHash: Iterable | AsyncIterable, + gitPath?: string +): Promise> { + const hashPaths: string[] = []; + + const input: Readable = Readable.from( + isIterable(filesToHash) + ? (function* (): IterableIterator { + for (const file of filesToHash) { + hashPaths.push(file); + yield `${file}\n`; + } + })() + : (async function* (): AsyncIterableIterator { + for await (const file of filesToHash) { + hashPaths.push(file); + yield `${file}\n`; + } + })(), + { + encoding: 'utf-8', + objectMode: false, + autoDestroy: true + } + ); + + const hashObjectResult: string = await spawnGitAsync( + gitPath, + STANDARD_GIT_OPTIONS.concat(['hash-object', '--stdin-paths']), + rootDirectory, + input + ); + + return parseGitHashObject(hashObjectResult, hashPaths); +} + /** * Gets the object hashes for all files in the Git repo, combining the current commit with working tree state. * Uses async operations and runs all primary Git calls in parallel. @@ -346,12 +401,10 @@ export async function getRepoStateAsync( rootDirectory ).then(parseGitStatus); - const hashPaths: string[] = []; async function* getFilesToHash(): AsyncIterableIterator { if (additionalRelativePathsToHash) { for (const file of additionalRelativePathsToHash) { - hashPaths.push(file); - yield `${file}\n`; + yield file; } } @@ -359,33 +412,23 @@ export async function getRepoStateAsync( for (const [filePath, exists] of locallyModified) { if (exists) { - hashPaths.push(filePath); - yield `${filePath}\n`; + yield filePath; } else { files.delete(filePath); } } } - const hashObjectPromise: Promise = spawnGitAsync( - gitPath, - STANDARD_GIT_OPTIONS.concat(['hash-object', '--stdin-paths']), + const hashObjectPromise: Promise> = hashFilesAsync( rootDirectory, - Readable.from(getFilesToHash(), { - encoding: 'utf-8', - objectMode: false, - autoDestroy: true - }) + getFilesToHash(), + gitPath ); - const [{ files, submodules }, hashObject] = await Promise.all([ - statePromise, - hashObjectPromise, - locallyModifiedPromise - ]); + const [{ files, submodules }] = await Promise.all([statePromise, locallyModifiedPromise]); // The result of "git hash-object" will be a list of file hashes delimited by newlines - for (const [filePath, hash] of parseGitHashObject(hashObject, hashPaths)) { + for (const [filePath, hash] of await hashObjectPromise) { files.set(filePath, hash); } diff --git a/libraries/package-deps-hash/src/index.ts b/libraries/package-deps-hash/src/index.ts index 26cb6d96aa2..c6668002dbe 100644 --- a/libraries/package-deps-hash/src/index.ts +++ b/libraries/package-deps-hash/src/index.ts @@ -19,5 +19,6 @@ export { getRepoChanges, getRepoRoot, getRepoStateAsync, - ensureGitMinimumVersion + ensureGitMinimumVersion, + hashFilesAsync } from './getRepoState';