diff --git a/package-lock.json b/package-lock.json index 091200ae8..d45ceb268 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,7 +18,6 @@ "clipboardy": "^5.0.2", "commander": "^14.0.2", "fast-xml-parser": "^5.3.3", - "fflate": "^0.8.2", "git-url-parse": "^16.1.0", "globby": "^16.1.0", "handlebars": "^4.7.8", @@ -31,6 +30,7 @@ "log-update": "^7.0.2", "minimatch": "^10.1.1", "picocolors": "^1.1.1", + "tar": "^7.5.7", "tiktoken": "^1.0.22", "tinypool": "^2.1.0", "web-tree-sitter": "^0.26.3", @@ -818,6 +818,18 @@ "node": "20 || >=22" } }, + "node_modules/@isaacs/fs-minipass": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", + "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==", + "license": "ISC", + "dependencies": { + "minipass": "^7.0.4" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@jridgewell/resolve-uri": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", @@ -2210,6 +2222,15 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/chownr": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", + "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, "node_modules/cli-cursor": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-5.0.0.tgz", @@ -2717,12 +2738,6 @@ "reusify": "^1.0.4" } }, - "node_modules/fflate": { - "version": "0.8.2", - "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.8.2.tgz", - "integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==", - "license": "MIT" - }, "node_modules/figures": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/figures/-/figures-6.1.0.tgz", @@ -3716,11 +3731,22 @@ "version": "7.1.2", "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, "engines": { "node": ">=16 || 14 >=14.17" } }, + "node_modules/minizlib": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz", + "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==", + "license": "MIT", + "dependencies": { + "minipass": "^7.1.2" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -5001,6 +5027,22 @@ "node": ">=8" } }, + "node_modules/tar": { + "version": "7.5.7", + "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.7.tgz", + "integrity": "sha512-fov56fJiRuThVFXD6o6/Q354S7pnWMJIVlDBYijsTNx6jKSE4pvrDTs6lUnmGvNyfJwFQQwWy3owKz1ucIhveQ==", + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/fs-minipass": "^4.0.0", + "chownr": "^3.0.0", + "minipass": "^7.1.2", + "minizlib": "^3.1.0", + "yallist": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/terminal-link": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/terminal-link/-/terminal-link-4.0.0.tgz", @@ -5531,6 +5573,15 @@ "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", "license": "ISC" }, + "node_modules/yallist": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", + "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, "node_modules/yoctocolors": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/yoctocolors/-/yoctocolors-2.1.2.tgz", diff --git a/package.json b/package.json index 994dd0852..e10eca1fb 100644 --- a/package.json +++ b/package.json @@ -84,7 +84,6 @@ "clipboardy": "^5.0.2", "commander": "^14.0.2", "fast-xml-parser": "^5.3.3", - "fflate": "^0.8.2", "git-url-parse": "^16.1.0", "globby": "^16.1.0", "handlebars": "^4.7.8", @@ -97,6 +96,7 @@ "log-update": "^7.0.2", "minimatch": "^10.1.1", "picocolors": "^1.1.1", + "tar": "^7.5.7", "tiktoken": "^1.0.22", "tinypool": "^2.1.0", "web-tree-sitter": "^0.26.3", diff --git a/src/core/git/gitHubArchive.ts b/src/core/git/gitHubArchive.ts index fdcb711bf..e5e96fa57 100644 --- a/src/core/git/gitHubArchive.ts +++ b/src/core/git/gitHubArchive.ts @@ -1,9 +1,7 @@ -import { createWriteStream } from 'node:fs'; -import * as fs from 'node:fs/promises'; -import * as path from 'node:path'; import { Readable, Transform } from 'node:stream'; import { pipeline } from 'node:stream/promises'; -import { unzip } from 'fflate'; +import * as zlib from 'node:zlib'; +import { extract as tarExtract } from 'tar'; import { RepomixError } from '../../shared/errorHandle.js'; import { logger } from '../../shared/logger.js'; import { @@ -11,7 +9,6 @@ import { buildGitHubMasterArchiveUrl, buildGitHubTagArchiveUrl, checkGitHubResponse, - getArchiveFilename, } from './gitHubArchiveApi.js'; import type { GitHubRepoInfo } from './gitRemoteParse.js'; @@ -28,27 +25,34 @@ export interface ArchiveDownloadProgress { export type ProgressCallback = (progress: ArchiveDownloadProgress) => void; +export interface ArchiveDownloadDeps { + fetch: typeof globalThis.fetch; + pipeline: typeof pipeline; + Transform: typeof Transform; + tarExtract: typeof tarExtract; + createGunzip: typeof zlib.createGunzip; +} + +const defaultDeps: ArchiveDownloadDeps = { + fetch: globalThis.fetch, + pipeline, + Transform, + tarExtract, + createGunzip: zlib.createGunzip, +}; + /** - * Downloads and extracts a GitHub repository archive + * Downloads and extracts a GitHub repository archive using streaming tar.gz extraction */ export const downloadGitHubArchive = async ( repoInfo: GitHubRepoInfo, targetDirectory: string, options: ArchiveDownloadOptions = {}, onProgress?: ProgressCallback, - deps = { - fetch: globalThis.fetch, - fs, - pipeline, - Transform, - createWriteStream, - }, + deps: ArchiveDownloadDeps = defaultDeps, ): Promise => { const { timeout = 30000, retries = 3 } = options; - // Ensure target directory exists - await deps.fs.mkdir(targetDirectory, { recursive: true }); - let lastError: Error | null = null; // Try downloading with multiple URL formats: main branch, master branch (fallback), then tag format @@ -63,7 +67,7 @@ export const downloadGitHubArchive = async ( try { logger.trace(`Downloading GitHub archive from: ${archiveUrl} (attempt ${attempt}/${retries})`); - await downloadAndExtractArchive(archiveUrl, targetDirectory, repoInfo, timeout, onProgress, deps); + await downloadAndExtractArchive(archiveUrl, targetDirectory, timeout, onProgress, deps); logger.trace('Successfully downloaded and extracted GitHub archive'); return; // Success - exit early @@ -71,12 +75,6 @@ export const downloadGitHubArchive = async ( lastError = error as Error; logger.trace(`Archive download attempt ${attempt} failed:`, lastError.message); - // If it's an extraction error, don't retry - the same archive will fail again - const isExtractionError = lastError instanceof RepomixError && lastError.message.includes('Failed to extract'); - if (isExtractionError) { - throw lastError; - } - // If it's a 404-like error and we have more URLs to try, don't retry this URL const isNotFoundError = lastError instanceof RepomixError && @@ -102,61 +100,22 @@ export const downloadGitHubArchive = async ( }; /** - * Downloads and extracts archive from a single URL + * Downloads and extracts a tar.gz archive from a single URL using streaming pipeline. + * The HTTP response is streamed through gunzip and tar extract directly to disk, + * without writing a temporary archive file. */ const downloadAndExtractArchive = async ( archiveUrl: string, targetDirectory: string, - repoInfo: GitHubRepoInfo, timeout: number, onProgress?: ProgressCallback, - deps = { - fetch: globalThis.fetch, - fs, - pipeline, - Transform, - createWriteStream, - }, -): Promise => { - // Download the archive - const tempArchivePath = path.join(targetDirectory, getArchiveFilename(repoInfo)); - - await downloadFile(archiveUrl, tempArchivePath, timeout, onProgress, deps); - - try { - // Extract the archive - await extractZipArchive(tempArchivePath, targetDirectory, repoInfo, { fs: deps.fs }); - } finally { - // Clean up the downloaded archive file - try { - await deps.fs.unlink(tempArchivePath); - } catch (error) { - logger.trace('Failed to cleanup archive file:', (error as Error).message); - } - } -}; - -/** - * Downloads a file from URL with progress tracking - */ -const downloadFile = async ( - url: string, - filePath: string, - timeout: number, - onProgress?: ProgressCallback, - deps = { - fetch: globalThis.fetch, - fs, - pipeline, - Transform, - createWriteStream, - }, + deps: ArchiveDownloadDeps = defaultDeps, ): Promise => { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeout); try { - const response = await deps.fetch(url, { + const response = await deps.fetch(archiveUrl, { signal: controller.signal, }); @@ -171,7 +130,6 @@ const downloadFile = async ( let downloaded = 0; let lastProgressUpdate = 0; - // Use Readable.fromWeb for better stream handling const nodeStream = Readable.fromWeb(response.body); // Transform stream for progress tracking @@ -193,7 +151,6 @@ const downloadFile = async ( callback(null, chunk); }, flush(callback) { - // Send final progress update if (onProgress) { onProgress({ downloaded, @@ -205,137 +162,25 @@ const downloadFile = async ( }, }); - // Write to file - const writeStream = deps.createWriteStream(filePath); - await deps.pipeline(nodeStream, progressStream, writeStream); - } finally { - clearTimeout(timeoutId); - } -}; - -/** - * Extracts a ZIP archive using fflate library - */ -const extractZipArchive = async ( - archivePath: string, - targetDirectory: string, - repoInfo: GitHubRepoInfo, - deps = { - fs, - }, -): Promise => { - try { - // Always use in-memory extraction for simplicity and reliability - await extractZipArchiveInMemory(archivePath, targetDirectory, repoInfo, deps); - } catch (error) { - throw new RepomixError(`Failed to extract archive: ${(error as Error).message}`); - } -}; - -/** - * Extracts ZIP archive by loading it entirely into memory (faster for small files) - */ -const extractZipArchiveInMemory = async ( - archivePath: string, - targetDirectory: string, - repoInfo: GitHubRepoInfo, - deps = { - fs, - }, -): Promise => { - // Read the ZIP file as a buffer - const zipBuffer = await deps.fs.readFile(archivePath); - const zipUint8Array = new Uint8Array(zipBuffer); - - // Extract ZIP using fflate - await new Promise((resolve, reject) => { - unzip(zipUint8Array, (err, extracted) => { - if (err) { - reject(new RepomixError(`Failed to extract ZIP archive: ${err.message}`)); - return; - } - - // Process extracted files concurrently in the callback - processExtractedFiles(extracted, targetDirectory, repoInfo, deps).then(resolve).catch(reject); + // Stream: HTTP response -> progress tracking -> gunzip -> tar extract to disk + // strip: 1 removes the top-level "repo-branch/" directory from archive paths + const extractStream = deps.tarExtract({ + cwd: targetDirectory, + strip: 1, }); - }); -}; + const gunzipStream = deps.createGunzip(); -/** - * Process extracted files sequentially to avoid EMFILE errors - */ -const processExtractedFiles = async ( - extracted: Record, - targetDirectory: string, - repoInfo: GitHubRepoInfo, - deps = { - fs, - }, -): Promise => { - const repoPrefix = `${repoInfo.repo}-`; - const createdDirs = new Set(); - - // Process files sequentially to avoid EMFILE errors completely - for (const [filePath, fileData] of Object.entries(extracted)) { - // GitHub archives have a top-level directory like "repo-branch/" - // We need to remove this prefix from the file paths - let relativePath = filePath; - - // Find and remove the repo prefix - const pathParts = filePath.split('/'); - if (pathParts.length > 0 && pathParts[0].startsWith(repoPrefix)) { - // Remove the first directory (repo-branch/) - relativePath = pathParts.slice(1).join('/'); - } - - // Skip empty paths (root directory) - if (!relativePath) { - continue; - } - - // Sanitize relativePath to prevent path traversal attacks - const sanitized = path.normalize(relativePath).replace(/^(\.\.([/\\]|$))+/, ''); - - // Reject absolute paths outright - if (path.isAbsolute(sanitized)) { - logger.trace(`Absolute path detected in archive, skipping: ${relativePath}`); - continue; - } - - const targetPath = path.resolve(targetDirectory, sanitized); - if (!targetPath.startsWith(path.resolve(targetDirectory))) { - logger.trace(`Unsafe path detected in archive, skipping: ${relativePath}`); - continue; - } - - // Check if this entry is a directory (ends with /) or empty file data indicates directory - const isDirectory = filePath.endsWith('/') || (fileData.length === 0 && relativePath.endsWith('/')); - - if (isDirectory) { - // Create directory immediately - if (!createdDirs.has(targetPath)) { - logger.trace(`Creating directory: ${targetPath}`); - await deps.fs.mkdir(targetPath, { recursive: true }); - createdDirs.add(targetPath); - } - } else { - // Create parent directory if needed and write file - const parentDir = path.dirname(targetPath); - if (!createdDirs.has(parentDir)) { - logger.trace(`Creating parent directory for file: ${parentDir}`); - await deps.fs.mkdir(parentDir, { recursive: true }); - createdDirs.add(parentDir); - } - - // Write file sequentially - logger.trace(`Writing file: ${targetPath}`); - try { - await deps.fs.writeFile(targetPath, fileData); - } catch (fileError) { - logger.trace(`Failed to write file ${targetPath}: ${(fileError as Error).message}`); - throw fileError; - } + try { + await deps.pipeline(nodeStream, progressStream, gunzipStream, extractStream); + } finally { + // Explicitly destroy streams to release handles. + // Bun's pipeline() may not fully clean up, causing subsequent worker_threads to hang. + nodeStream.destroy(); + progressStream.destroy(); + gunzipStream.destroy(); } + } finally { + clearTimeout(timeoutId); } }; diff --git a/src/core/git/gitHubArchiveApi.ts b/src/core/git/gitHubArchiveApi.ts index 5627b642f..e0a0dd932 100644 --- a/src/core/git/gitHubArchiveApi.ts +++ b/src/core/git/gitHubArchiveApi.ts @@ -3,9 +3,9 @@ import type { GitHubRepoInfo } from './gitRemoteParse.js'; /** * Constructs GitHub archive download URL - * Format: https://github.com/owner/repo/archive/refs/heads/branch.zip - * For tags: https://github.com/owner/repo/archive/refs/tags/tag.zip - * For commits: https://github.com/owner/repo/archive/commit.zip + * Format: https://github.com/owner/repo/archive/refs/heads/branch.tar.gz + * For tags: https://github.com/owner/repo/archive/refs/tags/tag.tar.gz + * For commits: https://github.com/owner/repo/archive/commit.tar.gz */ export const buildGitHubArchiveUrl = (repoInfo: GitHubRepoInfo): string => { const { owner, repo, ref } = repoInfo; @@ -13,18 +13,18 @@ export const buildGitHubArchiveUrl = (repoInfo: GitHubRepoInfo): string => { if (!ref) { // Default to HEAD (repository's default branch) - return `${baseUrl}/HEAD.zip`; + return `${baseUrl}/HEAD.tar.gz`; } // Check if ref looks like a commit SHA (40 hex chars or shorter) const isCommitSha = /^[0-9a-f]{4,40}$/i.test(ref); if (isCommitSha) { - return `${baseUrl}/${encodeURIComponent(ref)}.zip`; + return `${baseUrl}/${encodeURIComponent(ref)}.tar.gz`; } // For branches and tags, we need to determine the type // Default to branch format, will fallback to tag if needed - return `${baseUrl}/refs/heads/${encodeURIComponent(ref)}.zip`; + return `${baseUrl}/refs/heads/${encodeURIComponent(ref)}.tar.gz`; }; /** @@ -36,7 +36,7 @@ export const buildGitHubMasterArchiveUrl = (repoInfo: GitHubRepoInfo): string | return null; // Only applicable when no ref is specified } - return `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/archive/refs/heads/master.zip`; + return `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/archive/refs/heads/master.tar.gz`; }; /** @@ -48,21 +48,7 @@ export const buildGitHubTagArchiveUrl = (repoInfo: GitHubRepoInfo): string | nul return null; // Not applicable for commits or no ref } - return `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/archive/refs/tags/${encodeURIComponent(ref)}.zip`; -}; - -/** - * Gets the expected archive filename from GitHub - * Format: repo-branch.zip or repo-sha.zip - */ -export const getArchiveFilename = (repoInfo: GitHubRepoInfo): string => { - const { repo, ref } = repoInfo; - const refPart = ref || 'HEAD'; - - // GitHub uses the last part of the ref for the filename - const refName = refPart.includes('/') ? refPart.split('/').pop() : refPart; - - return `${repo}-${refName}.zip`; + return `https://github.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/archive/refs/tags/${encodeURIComponent(ref)}.tar.gz`; }; /** diff --git a/tests/core/git/gitHubArchive.test.ts b/tests/core/git/gitHubArchive.test.ts index e07036e04..846404b0b 100644 --- a/tests/core/git/gitHubArchive.test.ts +++ b/tests/core/git/gitHubArchive.test.ts @@ -1,8 +1,7 @@ -import type { createWriteStream, WriteStream } from 'node:fs'; -import type * as fsPromises from 'node:fs/promises'; -import * as path from 'node:path'; -import { Transform } from 'node:stream'; +import { Transform, Writable } from 'node:stream'; import type { pipeline as pipelineType } from 'node:stream/promises'; +import type * as zlib from 'node:zlib'; +import type { extract as tarExtractType } from 'tar'; import { beforeEach, describe, expect, test, vi } from 'vitest'; import { type ArchiveDownloadOptions, @@ -15,74 +14,52 @@ import { RepomixError } from '../../../src/shared/errorHandle.js'; // Mock modules vi.mock('../../../src/shared/logger'); -vi.mock('fflate', () => ({ - unzip: vi.fn(), -})); // Type for the deps parameter of downloadGitHubArchive interface MockDeps { fetch: typeof globalThis.fetch; - fs: typeof fsPromises; pipeline: typeof pipelineType; Transform: typeof Transform; - createWriteStream: typeof createWriteStream; + tarExtract: typeof tarExtractType; + createGunzip: typeof zlib.createGunzip; } -// Simple ZIP test data -const mockZipData = new Uint8Array([0x50, 0x4b, 0x03, 0x04]); // Simple ZIP header +// Simple test data +const mockStreamData = new Uint8Array([0x1f, 0x8b, 0x08, 0x00]); // gzip magic bytes describe('gitHubArchive', () => { - // Define typed mock functions - const mockFs = { - mkdir: vi.fn(), - readFile: vi.fn(), - writeFile: vi.fn(), - unlink: vi.fn(), - }; - let mockFetch: ReturnType>; let mockPipeline: ReturnType>; - let mockTransformConstructor: typeof Transform; - let mockCreateWriteStream: ReturnType>; - let mockUnzip: ReturnType; + let mockTarExtract: ReturnType>; + let mockCreateGunzip: ReturnType>; let mockDeps: MockDeps; - beforeEach(async () => { + beforeEach(() => { vi.clearAllMocks(); mockFetch = vi.fn(); - mockPipeline = vi.fn(); - mockTransformConstructor = Transform; - mockCreateWriteStream = vi.fn(); - - // Get the mocked unzip function - const { unzip } = await import('fflate'); - mockUnzip = vi.mocked(unzip); - - // Reset fs mocks - for (const mock of Object.values(mockFs)) { - mock.mockReset(); - } - - // Setup default successful behaviors - mockFs.mkdir.mockResolvedValue(undefined); - mockFs.unlink.mockResolvedValue(undefined); - mockFs.readFile.mockResolvedValue(Buffer.from(mockZipData)); - mockFs.writeFile.mockResolvedValue(undefined); - mockPipeline.mockResolvedValue(undefined); - mockCreateWriteStream.mockReturnValue({ - write: vi.fn(), - end: vi.fn(), - } as unknown as WriteStream); - - // Create mockDeps with type casting for mock objects - // Using 'as unknown as Type' pattern is idiomatic for test mocks + mockPipeline = vi.fn().mockResolvedValue(undefined); + mockTarExtract = vi.fn().mockReturnValue( + new Writable({ + write(_chunk, _enc, cb) { + cb(); + }, + }) as unknown as ReturnType, + ); + mockCreateGunzip = vi.fn().mockReturnValue( + new Transform({ + transform(chunk, _enc, cb) { + cb(null, chunk); + }, + }) as unknown as ReturnType, + ); + mockDeps = { fetch: mockFetch, - fs: mockFs as unknown as typeof fsPromises, pipeline: mockPipeline as unknown as typeof pipelineType, - Transform: mockTransformConstructor, - createWriteStream: mockCreateWriteStream, + Transform, + tarExtract: mockTarExtract as unknown as typeof tarExtractType, + createGunzip: mockCreateGunzip as unknown as typeof zlib.createGunzip, }; }); @@ -99,134 +76,76 @@ describe('gitHubArchive', () => { retries: 3, }; - test('should successfully download and extract archive', async () => { - // Mock successful response with stream - const mockStream = new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }); - - mockFetch.mockResolvedValue({ + const createMockResponse = (overrides: Partial = {}): Response => { + return { ok: true, status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: mockStream, - } as unknown as Response); - - // Mock unzip to extract files - mockUnzip.mockImplementation((_data, callback) => { - const extracted = { - 'repomix-main/': new Uint8Array(0), // Directory - 'repomix-main/test.txt': new Uint8Array([0x68, 0x65, 0x6c, 0x6c, 0x6f]), // "hello" - }; - callback(null, extracted); - }); + headers: new Map([['content-length', mockStreamData.length.toString()]]), + body: new ReadableStream({ + start(controller) { + controller.enqueue(mockStreamData); + controller.close(); + }, + }), + ...overrides, + } as unknown as Response; + }; - await downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, mockOptions, undefined, mockDeps); + test('should successfully download and extract archive', async () => { + mockFetch.mockResolvedValue(createMockResponse()); - // Verify directory creation - expect(mockFs.mkdir).toHaveBeenCalledWith(mockTargetDirectory, { recursive: true }); + await downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, mockOptions, undefined, mockDeps); - // Verify fetch was called + // Verify fetch was called with tar.gz URL expect(mockFetch).toHaveBeenCalledWith( - 'https://github.com/yamadashy/repomix/archive/refs/heads/main.zip', + 'https://github.com/yamadashy/repomix/archive/refs/heads/main.tar.gz', expect.objectContaining({ signal: expect.any(AbortSignal), }), ); - // Verify file operations - expect(mockFs.writeFile).toHaveBeenCalledWith( - path.resolve(mockTargetDirectory, 'test.txt'), - expect.any(Uint8Array), - ); + // Verify tar extract was called with correct options + expect(mockTarExtract).toHaveBeenCalledWith({ + cwd: mockTargetDirectory, + strip: 1, + }); - // Verify cleanup - expect(mockFs.unlink).toHaveBeenCalledWith(path.join(mockTargetDirectory, 'repomix-main.zip')); + // Verify streaming pipeline was used + expect(mockPipeline).toHaveBeenCalledTimes(1); }); test('should handle progress callback', async () => { const mockProgressCallback: ProgressCallback = vi.fn(); - - const mockStream = new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }); - - mockFetch.mockResolvedValue({ - ok: true, - status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: mockStream, - } as unknown as Response); - - mockUnzip.mockImplementation((_data, callback) => { - callback(null, {}); - }); + mockFetch.mockResolvedValue(createMockResponse()); await downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, mockOptions, mockProgressCallback, mockDeps); - // Progress callback is called via Transform stream, which is handled internally - // Just verify the download completed successfully expect(mockFetch).toHaveBeenCalled(); - expect(mockUnzip).toHaveBeenCalled(); + expect(mockPipeline).toHaveBeenCalled(); }); - test('should retry on failure', async () => { - // First two attempts fail, third succeeds + test('should retry on network failure', async () => { mockFetch .mockRejectedValueOnce(new Error('Network error')) .mockRejectedValueOnce(new Error('Network error')) - .mockResolvedValueOnce({ - ok: true, - status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }), - } as unknown as Response); - - mockUnzip.mockImplementation((_data, callback) => { - callback(null, {}); - }); + .mockResolvedValueOnce(createMockResponse()); - // Use fewer retries to speed up test await downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, { retries: 2 }, undefined, mockDeps); expect(mockFetch).toHaveBeenCalledTimes(3); }); test('should try fallback URLs on 404', async () => { - // Mock 404 for main branch, success for master branch mockFetch - .mockResolvedValueOnce({ - ok: false, - status: 404, - headers: new Map(), - body: null, - } as unknown as Response) - .mockResolvedValueOnce({ - ok: true, - status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }), - } as unknown as Response); - - mockUnzip.mockImplementation((_data, callback) => { - callback(null, {}); - }); + .mockResolvedValueOnce( + createMockResponse({ + ok: false, + status: 404, + headers: new Map(), + body: null, + } as unknown as Partial), + ) + .mockResolvedValueOnce(createMockResponse()); const repoInfoNoRef = { owner: 'yamadashy', repo: 'repomix' }; @@ -234,11 +153,11 @@ describe('gitHubArchive', () => { // Should try HEAD first, then master branch expect(mockFetch).toHaveBeenCalledWith( - 'https://github.com/yamadashy/repomix/archive/HEAD.zip', + 'https://github.com/yamadashy/repomix/archive/HEAD.tar.gz', expect.any(Object), ); expect(mockFetch).toHaveBeenCalledWith( - 'https://github.com/yamadashy/repomix/archive/refs/heads/master.zip', + 'https://github.com/yamadashy/repomix/archive/refs/heads/master.tar.gz', expect.any(Object), ); }); @@ -250,161 +169,21 @@ describe('gitHubArchive', () => { downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, { retries: 2 }, undefined, mockDeps), ).rejects.toThrow(RepomixError); - // Multiple URLs are tried even with ref: main, fallback, tag // 2 retries × 2 URLs (main + tag for "main" ref) = 4 total attempts expect(mockFetch).toHaveBeenCalledTimes(4); }); - test('should handle ZIP extraction error', async () => { - mockFetch.mockResolvedValue({ - ok: true, - status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }), - } as unknown as Response); - - // Mock unzip to fail - mockUnzip.mockImplementation((_data, callback) => { - callback(new Error('Invalid ZIP file')); - }); + test('should handle extraction error', async () => { + mockFetch.mockResolvedValue(createMockResponse()); + mockPipeline.mockRejectedValue(new Error('tar extraction failed')); await expect( downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, { retries: 1 }, undefined, mockDeps), ).rejects.toThrow(RepomixError); }); - test('should not retry on extraction error', async () => { - mockFetch.mockResolvedValue({ - ok: true, - status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }), - } as unknown as Response); - - // Mock unzip to fail - mockUnzip.mockImplementation((_data, callback) => { - callback(new Error('Invalid ZIP file')); - }); - - await expect( - downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, { retries: 3 }, undefined, mockDeps), - ).rejects.toThrow(RepomixError); - - // Should only fetch once - extraction errors should not trigger retries - expect(mockFetch).toHaveBeenCalledTimes(1); - }); - - test('should handle path traversal attack', async () => { - mockFetch.mockResolvedValue({ - ok: true, - status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }), - } as unknown as Response); - - // Mock unzip with dangerous paths - mockUnzip.mockImplementation((_data, callback) => { - const extracted = { - 'repomix-main/../../../etc/passwd': new Uint8Array([0x65, 0x76, 0x69, 0x6c]), // "evil" - 'repomix-main/safe.txt': new Uint8Array([0x73, 0x61, 0x66, 0x65]), // "safe" - }; - callback(null, extracted); - }); - - await downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, mockOptions, undefined, mockDeps); - - // Should write both files - the path normalization doesn't completely prevent this case - expect(mockFs.writeFile).toHaveBeenCalledWith( - path.resolve(mockTargetDirectory, 'safe.txt'), - expect.any(Uint8Array), - ); - - // Verify that both files are written (one was sanitized to remove path traversal) - expect(mockFs.writeFile).toHaveBeenCalledTimes(2); - }); - - test('should handle absolute paths in ZIP', async () => { - mockFetch.mockResolvedValue({ - ok: true, - status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }), - } as unknown as Response); - - // Mock unzip with absolute path - mockUnzip.mockImplementation((_data, callback) => { - const extracted = { - '/etc/passwd': new Uint8Array([0x65, 0x76, 0x69, 0x6c]), // "evil" - 'repomix-main/safe.txt': new Uint8Array([0x73, 0x61, 0x66, 0x65]), // "safe" - }; - callback(null, extracted); - }); - - await downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, mockOptions, undefined, mockDeps); - - // Should only write safe file, not the absolute path - expect(mockFs.writeFile).toHaveBeenCalledWith( - path.resolve(mockTargetDirectory, 'safe.txt'), - expect.any(Uint8Array), - ); - - // Should not write the absolute path file - expect(mockFs.writeFile).not.toHaveBeenCalledWith('/etc/passwd', expect.any(Uint8Array)); - }); - - test('should cleanup archive file even on extraction failure', async () => { - mockFetch.mockResolvedValue({ - ok: true, - status: 200, - headers: new Map([['content-length', mockZipData.length.toString()]]), - body: new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }), - } as unknown as Response); - - // Mock unzip to fail - mockUnzip.mockImplementation((_data, callback) => { - callback(new Error('Extraction failed')); - }); - - await expect( - downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, { retries: 1 }, undefined, mockDeps), - ).rejects.toThrow(); - - // Should still attempt cleanup - expect(mockFs.unlink).toHaveBeenCalledWith(path.join(mockTargetDirectory, 'repomix-main.zip')); - }); - test('should handle missing response body', async () => { - mockFetch.mockResolvedValue({ - ok: true, - status: 200, - headers: new Map(), - body: null, - } as unknown as Response); + mockFetch.mockResolvedValue(createMockResponse({ body: null } as unknown as Partial)); await expect( downloadGitHubArchive(mockRepoInfo, mockTargetDirectory, { retries: 1 }, undefined, mockDeps), @@ -412,27 +191,22 @@ describe('gitHubArchive', () => { }); test('should handle timeout', async () => { - // Mock a fetch that takes too long mockFetch.mockImplementation( - () => - new Promise((resolve) => { - setTimeout(() => { - resolve({ - ok: true, - status: 200, - headers: new Map(), - body: new ReadableStream({ - start(controller) { - controller.enqueue(mockZipData); - controller.close(); - }, - }), - } as unknown as Response); - }, 100); // Resolve after 100ms, but timeout is 50ms + (_url: string | URL | Request, init?: RequestInit) => + new Promise((resolve, reject) => { + const timer = setTimeout(() => { + resolve(createMockResponse()); + }, 100); + + // Respect AbortSignal so timeout actually cancels the fetch + init?.signal?.addEventListener('abort', () => { + clearTimeout(timer); + reject(new DOMException('The operation was aborted', 'AbortError')); + }); }), ); - const shortTimeout = 50; // 50ms timeout for faster test + const shortTimeout = 50; await expect( downloadGitHubArchive( diff --git a/tests/core/git/gitHubArchiveApi.test.ts b/tests/core/git/gitHubArchiveApi.test.ts index fdfd8cff8..637e96ab1 100644 --- a/tests/core/git/gitHubArchiveApi.test.ts +++ b/tests/core/git/gitHubArchiveApi.test.ts @@ -4,7 +4,6 @@ import { buildGitHubMasterArchiveUrl, buildGitHubTagArchiveUrl, checkGitHubResponse, - getArchiveFilename, } from '../../../src/core/git/gitHubArchiveApi.js'; import { parseGitHubRepoInfo } from '../../../src/core/git/gitRemoteParse.js'; import { RepomixError } from '../../../src/shared/errorHandle.js'; @@ -14,25 +13,25 @@ describe('GitHub Archive API', () => { test('should build URL for default branch (HEAD)', () => { const repoInfo = { owner: 'user', repo: 'repo' }; const url = buildGitHubArchiveUrl(repoInfo); - expect(url).toBe('https://github.com/user/repo/archive/HEAD.zip'); + expect(url).toBe('https://github.com/user/repo/archive/HEAD.tar.gz'); }); test('should build URL for specific branch', () => { const repoInfo = { owner: 'user', repo: 'repo', ref: 'develop' }; const url = buildGitHubArchiveUrl(repoInfo); - expect(url).toBe('https://github.com/user/repo/archive/refs/heads/develop.zip'); + expect(url).toBe('https://github.com/user/repo/archive/refs/heads/develop.tar.gz'); }); test('should build URL for commit SHA', () => { const repoInfo = { owner: 'user', repo: 'repo', ref: 'abc123def456' }; const url = buildGitHubArchiveUrl(repoInfo); - expect(url).toBe('https://github.com/user/repo/archive/abc123def456.zip'); + expect(url).toBe('https://github.com/user/repo/archive/abc123def456.tar.gz'); }); test('should build URL for full commit SHA', () => { const repoInfo = { owner: 'user', repo: 'repo', ref: 'abc123def456789012345678901234567890abcd' }; const url = buildGitHubArchiveUrl(repoInfo); - expect(url).toBe('https://github.com/user/repo/archive/abc123def456789012345678901234567890abcd.zip'); + expect(url).toBe('https://github.com/user/repo/archive/abc123def456789012345678901234567890abcd.tar.gz'); }); }); @@ -40,7 +39,7 @@ describe('GitHub Archive API', () => { test('should build URL for master branch fallback', () => { const repoInfo = { owner: 'user', repo: 'repo' }; const url = buildGitHubMasterArchiveUrl(repoInfo); - expect(url).toBe('https://github.com/user/repo/archive/refs/heads/master.zip'); + expect(url).toBe('https://github.com/user/repo/archive/refs/heads/master.tar.gz'); }); test('should return null when ref is specified', () => { @@ -54,7 +53,7 @@ describe('GitHub Archive API', () => { test('should build URL for tag', () => { const repoInfo = { owner: 'user', repo: 'repo', ref: 'v1.0.0' }; const url = buildGitHubTagArchiveUrl(repoInfo); - expect(url).toBe('https://github.com/user/repo/archive/refs/tags/v1.0.0.zip'); + expect(url).toBe('https://github.com/user/repo/archive/refs/tags/v1.0.0.tar.gz'); }); test('should return null for commit SHA', () => { @@ -70,32 +69,6 @@ describe('GitHub Archive API', () => { }); }); - describe('getArchiveFilename', () => { - test('should generate filename for default branch (HEAD)', () => { - const repoInfo = { owner: 'user', repo: 'myrepo' }; - const filename = getArchiveFilename(repoInfo); - expect(filename).toBe('myrepo-HEAD.zip'); - }); - - test('should generate filename for specific branch', () => { - const repoInfo = { owner: 'user', repo: 'myrepo', ref: 'develop' }; - const filename = getArchiveFilename(repoInfo); - expect(filename).toBe('myrepo-develop.zip'); - }); - - test('should generate filename for tag with slash', () => { - const repoInfo = { owner: 'user', repo: 'myrepo', ref: 'release/v1.0' }; - const filename = getArchiveFilename(repoInfo); - expect(filename).toBe('myrepo-v1.0.zip'); - }); - - test('should generate filename for commit SHA', () => { - const repoInfo = { owner: 'user', repo: 'myrepo', ref: 'abc123' }; - const filename = getArchiveFilename(repoInfo); - expect(filename).toBe('myrepo-abc123.zip'); - }); - }); - describe('checkGitHubResponse', () => { test('should not throw for successful response', () => { const mockResponse = new Response('', { status: 200 });