diff --git a/.github/workflows/if-nodejs-pr-testing.yml b/.github/workflows/if-nodejs-pr-testing.yml index 54aee7addd6c..04ae195ff390 100644 --- a/.github/workflows/if-nodejs-pr-testing.yml +++ b/.github/workflows/if-nodejs-pr-testing.yml @@ -174,4 +174,4 @@ jobs: fail_ci_if_error: true files: ./coverage/lcov.info token: ${{ secrets.CODECOV_TOKEN }} - verbose: true + verbose: true \ No newline at end of file diff --git a/scripts/markdown/check-edit-links.ts b/scripts/markdown/check-edit-links.ts index 65682bfc2d9f..aa9c9419d73b 100644 --- a/scripts/markdown/check-edit-links.ts +++ b/scripts/markdown/check-edit-links.ts @@ -32,13 +32,70 @@ interface PathObject { * * @throws {Error} If an error occurs during the HTTP HEAD request for any edit link. */ + +// NEW: Async generator for efficient directory traversal +async function* walkDirectory(dir: string, relativePath = ''): AsyncGenerator<{ filePath: string; relativeFilePath: string }> { + const entries = await fs.readdir(dir, { withFileTypes: true }); + + for (const entry of entries) { + const absPath = path.join(dir, entry.name); + const relPath = path.join(relativePath, entry.name); + + if (entry.isDirectory()) { + yield* walkDirectory(absPath, relPath); + } else if (entry.isFile() && entry.name.endsWith('.md') && entry.name !== '_section.md') { + yield { filePath: absPath, relativeFilePath: relPath }; + } + } +} + +/** + * Recursively traverses a folder and collects all Markdown file paths, + * excluding `_section.md` files. For each Markdown file found, it constructs + * the corresponding URL path and determines the appropriate edit link. + * + * This function uses an async generator (`walkDirectory`) to stream file paths + * instead of loading all of them into memory at once, improving performance + * and memory efficiency in large documentation repositories. + * + * @param folderPath - The absolute path to the root folder to traverse. + * @param editOptions - An array of objects used to determine the correct edit link + * for each markdown file. Each object should have a `value` and `href`. + * @returns A promise that resolves to an array of `PathObject`, each containing: + * - `filePath`: Absolute path to the markdown file. + * - `urlPath`: Relative URL path derived from the file's location. + * - `editLink`: Link to edit the file, based on `editOptions`. + * + * @throws Will throw an error if the directory traversal fails. + */ +async function generatePaths(folderPath: string, editOptions: { value: string; href: string }[]): Promise { + if (typeof folderPath !== 'string' || !folderPath) { + throw new TypeError('The "path" argument must be a non-empty string.'); + } + + const result: PathObject[] = []; + + try { + for await (const { filePath, relativeFilePath } of walkDirectory(folderPath)) { + const urlPath = relativeFilePath.split(path.sep).join('/').replace(/\.md$/, ''); + result.push({filePath,urlPath,editLink: determineEditLink(urlPath, filePath, editOptions)}); + } + + return result; + } catch (err) { + const errorMessage = err instanceof Error ? err.message : String(err); + const errorStack = err instanceof Error ? err.stack : ''; + throw new Error(`Error walking directory ${folderPath}: ${errorMessage}\n${errorStack}`); + } +} + async function processBatch(batch: PathObject[]): Promise<(PathObject | null)[]> { const TIMEOUT_MS = Number(process.env.DOCS_LINK_CHECK_TIMEOUT) || 5000; return Promise.all( batch.map(async ({ filePath, urlPath, editLink }) => { let timeout: NodeJS.Timeout | undefined; - + try { if (!editLink || ignoreFiles.some((ignorePath) => filePath.endsWith(ignorePath))) return null; @@ -134,61 +191,6 @@ function determineEditLink( return target ? `${target.href}/${path.basename(filePath)}` : null; } -/** - * Recursively processes markdown files in a directory to generate path objects with corresponding edit links. - * - * This function reads the contents of the specified folder, skipping files named "_section.md", and recursively traverses subdirectories. - * For each markdown file encountered, it constructs a URL path by replacing system path separators with '/' and removing the file extension, - * then generates an edit link based on the provided edit options. The results are accumulated in an array and returned. - * - * @param folderPath - The folder to process. - * @param editOptions - An array of edit link option objects with `value` and `href` properties. - * @param relativePath - Optional relative path for URL generation (default: ''). - * @param result - Optional accumulator for results (default: an empty array). - * @returns A promise that resolves with an array of objects, each containing a file path, URL path, and edit link. - * @throws {Error} If an error occurs while reading or processing the directory. - */ -async function generatePaths( - folderPath: string, - editOptions: { value: string; href: string }[], - relativePath = '', - result: PathObject[] = [] -): Promise { - try { - const files = await fs.readdir(folderPath); - - await Promise.all( - files.map(async (file) => { - const filePath = path.join(folderPath, file); - const relativeFilePath = path.join(relativePath, file); - - // Skip _section.md files - if (file === '_section.md') { - return; - } - - const stats = await fs.stat(filePath); - - if (stats.isDirectory()) { - await generatePaths(filePath, editOptions, relativeFilePath, result); - } else if (stats.isFile() && file.endsWith('.md')) { - const urlPath = relativeFilePath.split(path.sep).join('/').replace('.md', ''); - - result.push({ - filePath, - urlPath, - editLink: determineEditLink(urlPath, filePath, editOptions) - }); - } - }) - ); - - return result; - } catch (err) { - throw new Error(`Error processing directory ${folderPath}: ${err}`); - } -} - /** * Executes the main workflow for validating edit links in markdown files. * diff --git a/tests/markdown/check-edit-links.test.ts b/tests/markdown/check-edit-links.test.ts index 958cec8563dd..ed5b1500e7ad 100644 --- a/tests/markdown/check-edit-links.test.ts +++ b/tests/markdown/check-edit-links.test.ts @@ -17,9 +17,13 @@ jest.mock('../../scripts/helpers/logger.ts', () => ({ logger: { info: jest.fn() } })); jest.mock('node-fetch-2', () => jest.fn()); +function dirent(name: string, isFile = true, isDirectory = false) { + return { name, isFile: () => isFile, isDirectory: () => isDirectory }; +} describe('URL Checker Tests', () => { const mockFetch = fetch as jest.Mock; + const testDir = path.resolve(__dirname, '../../markdown/docs'); beforeEach(() => { jest.clearAllMocks(); @@ -64,11 +68,23 @@ describe('URL Checker Tests', () => { expect(result).toBe(null); }); + + it('returns fallback link if editOption.value is empty', () => { + const fallbackOption = [{ value: '', href: 'https://github.com/org/repo/edit/main' }]; + expect( + determineEditLink('docs/anything', 'docs/anything.md', fallbackOption), + ).toBe('https://github.com/org/repo/edit/main/docs/docs/anything.md'); + }); + + it('returns correct link for specific match', () => { + const options = [{ value: 'special', href: 'https://github.com/org/repo/edit/main' }]; + expect( + determineEditLink('docs/special', 'docs/special.md', options), + ).toBe('https://github.com/org/repo/edit/main/special.md'); + }); }); describe('generatePaths', () => { - const testDir = path.resolve(__dirname, '../../markdown/docs'); - it('should generate correct paths for markdown files', async () => { const paths = await generatePaths(testDir, editOptions); @@ -89,13 +105,8 @@ describe('URL Checker Tests', () => { it('should skip non-markdown files', async () => { // Create a mock implementation to test the else branch - const mockReaddir = jest.spyOn(fs, 'readdir') as jest.Mock; - const mockStat = jest.spyOn(fs, 'stat') as jest.Mock; - - mockReaddir.mockImplementationOnce(() => Promise.resolve(['test.js', 'test.md'])); - mockStat.mockImplementationOnce(() => Promise.resolve({ isDirectory: () => false, isFile: () => true })); - mockStat.mockImplementationOnce(() => Promise.resolve({ isDirectory: () => false, isFile: () => true })); - + const mockReaddir = jest.spyOn(fs, 'readdir').mockImplementation(async (dir, opts) => [dirent('test.js', true, false), dirent('test.md', true, false)]); + const result = await generatePaths(testDir, editOptions); // Only the markdown file should be included, not the js file @@ -103,7 +114,6 @@ describe('URL Checker Tests', () => { expect(result[0].filePath.endsWith('.md')).toBe(true); mockReaddir.mockRestore(); - mockStat.mockRestore(); }); it('should handle errors gracefully', async () => { @@ -111,6 +121,27 @@ describe('URL Checker Tests', () => { await expect(generatePaths(invalidDir, editOptions)).rejects.toThrow(); }); + + + it('throws TypeError for invalid folderPath', async () => { + // @ts-expect-error + await expect(generatePaths(undefined, editOptions)).rejects.toThrow(TypeError); + await expect(generatePaths('', editOptions)).rejects.toThrow(TypeError); + }); + + it('throws error if readdir fails', async () => { + jest.spyOn(fs, 'readdir').mockImplementationOnce(() => {throw new Error('FS error')}); + await expect(generatePaths(testDir, editOptions)).rejects.toThrow('FS error'); + }); + + it('handles subdirectory traversal', async () => { + jest.spyOn(fs, 'readdir').mockImplementationOnce(async () => [dirent('subdir', false, true),dirent('main.md', true, false)]) + .mockImplementationOnce(async () => [dirent('subfile.md', true, false)]); + const result = await generatePaths(testDir, editOptions); + + expect(result.some((f) => f.filePath.endsWith('main.md'))).toBe(true); + expect(result.some((f) => f.filePath.endsWith('subfile.md'))).toBe(true); + }); }); describe('processBatch', () => { @@ -163,8 +194,32 @@ describe('URL Checker Tests', () => { ); await expect(processBatch(testBatch)).rejects.toThrow(); }, 20000); + + const batch = [ + {filePath: 'file1.md',urlPath: 'docs/file1',editLink: 'https://github.com/org/repo/edit/main/file1.md'}, + {filePath: 'reference/specification/v2.x.md',urlPath: 'docs/reference/specification/v2.x',editLink: 'https://github.com/org/repo/edit/main/v2.x.md'}, + { filePath: 'file2.md', urlPath: 'docs/file2', editLink: null } // no editLink + ]; + + it('skips files with no editLink or in ignoreFiles', async () => { + mockFetch.mockImplementation(() => Promise.resolve({ status: 200 })); + const result = await processBatch(batch); + expect(result).toEqual([null, null, null]); + }); + + it('returns file if editLink is 404', async () => { + mockFetch.mockImplementation(() => Promise.resolve({ status: 404 })); + const result = await processBatch([{filePath: 'file.md', urlPath: 'docs/file',editLink: 'https://github.com/org/repo/edit/main/file.md'}]); + expect(result[0]?.editLink).toContain('file.md'); + }); + + it('rejects on network error', async () => { + mockFetch.mockImplementation(() =>Promise.reject(new Error('Network error'))); + await expect(processBatch([{filePath: 'file.md',urlPath: 'docs/file',editLink: 'https://github.com/org/repo/edit/main/file.md'}])).rejects.toThrow('Network error'); + }); }); + // ----------- checkUrls tests ----------- describe('checkUrls', () => { it('should process all URLs in batches', async () => { mockFetch.mockImplementation(() => Promise.resolve({ status: 200 })); @@ -184,6 +239,14 @@ describe('URL Checker Tests', () => { expect(results.length).toBe(2); }); + + it('returns only 404s from batch', async () => { + mockFetch.mockImplementation((url) =>Promise.resolve({ status: url.includes('bad') ? 404 : 200 })); + const paths = [{filePath: 'good.md',urlPath: 'docs/good',editLink: 'https://github.com/org/repo/edit/main/good.md'},{filePath: 'bad.md',urlPath: 'docs/bad',editLink: 'https://github.com/org/repo/edit/main/bad.md'}]; + const result = await checkUrls(paths); + expect(result.length).toBe(1); + expect(result[0].filePath).toBe('bad.md'); + }); }); describe('main', () => {