diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a55e00ae3..8f72d9377 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,9 +86,16 @@ jobs: with: node-version-file: .tool-versions cache: npm + - name: Build and link local repomix + run: | + npm ci + npm run build + npm link - name: Install website server dependencies working-directory: website/server - run: npm ci + run: | + npm ci + npm link repomix - name: Lint website server working-directory: website/server run: npm run lint diff --git a/src/cli/actions/defaultAction.ts b/src/cli/actions/defaultAction.ts index d46789bad..0fe69e1fd 100644 --- a/src/cli/actions/defaultAction.ts +++ b/src/cli/actions/defaultAction.ts @@ -107,7 +107,7 @@ export const runDefaultAction = async ( // Create worker task runner const taskRunner = initTaskRunner({ numOfTasks: 1, - workerPath: new URL('./workers/defaultActionWorker.js', import.meta.url).href, + workerType: 'defaultAction', runtime: 'child_process', }); diff --git a/src/cli/actions/workers/defaultActionWorker.ts b/src/cli/actions/workers/defaultActionWorker.ts index f1d2b9ec0..323ebaa84 100644 --- a/src/cli/actions/workers/defaultActionWorker.ts +++ b/src/cli/actions/workers/defaultActionWorker.ts @@ -43,19 +43,30 @@ async function defaultActionWorker( }; } + // Validate task structure + if (!task || typeof task !== 'object') { + throw new Error(`Invalid task: expected object, got ${typeof task}`); + } + // At this point, task is guaranteed to be DefaultActionTask const { directories, cwd, config, cliOptions, stdinFilePaths } = task; + if (!directories || !Array.isArray(directories)) { + throw new Error('Invalid task: directories must be an array'); + } + + // Provide defaults for bundled environments where cliOptions might be undefined + const safeCliOptions: CliOptions = cliOptions ?? {}; + logger.trace('Worker: Using pre-loaded config:', config); - // Initialize spinner in worker - const spinner = new Spinner('Initializing...', cliOptions); + const spinner = new Spinner('Initializing...', safeCliOptions); spinner.start(); let packResult: PackResult; try { - const { skillName, skillDir, skillProjectName, skillSourceUrl } = cliOptions; + const { skillName, skillDir, skillProjectName, skillSourceUrl } = safeCliOptions; const packOptions = { skillName, skillDir, skillProjectName, skillSourceUrl }; if (stdinFilePaths) { @@ -105,7 +116,7 @@ async function defaultActionWorker( export default defaultActionWorker; // Export cleanup function for Tinypool teardown -export const onWorkerTermination = async () => { +export const onWorkerTermination = async (): Promise => { // Any cleanup needed when worker terminates // Currently no specific cleanup required for defaultAction worker }; diff --git a/src/cli/cliSpinner.ts b/src/cli/cliSpinner.ts index c40878cb1..a499965d3 100644 --- a/src/cli/cliSpinner.ts +++ b/src/cli/cliSpinner.ts @@ -12,10 +12,11 @@ export class Spinner { private interval: ReturnType | null = null; private readonly isQuiet: boolean; - constructor(message: string, cliOptions: CliOptions) { + constructor(message: string, cliOptions?: CliOptions) { this.message = message; // If the user has specified the verbose flag, don't show the spinner - this.isQuiet = cliOptions.quiet || cliOptions.verbose || cliOptions.stdout || false; + // Use optional chaining to handle undefined cliOptions (e.g., in bundled worker environments) + this.isQuiet = cliOptions?.quiet || cliOptions?.verbose || cliOptions?.stdout || false; } start(): void { diff --git a/src/core/file/fileCollect.ts b/src/core/file/fileCollect.ts index e5f1c2894..07191634f 100644 --- a/src/core/file/fileCollect.ts +++ b/src/core/file/fileCollect.ts @@ -25,7 +25,7 @@ export const collectFiles = async ( ): Promise => { const taskRunner = deps.initTaskRunner({ numOfTasks: filePaths.length, - workerPath: new URL('./workers/fileCollectWorker.js', import.meta.url).href, + workerType: 'fileCollect', runtime: 'worker_threads', }); const tasks = filePaths.map( diff --git a/src/core/file/fileProcess.ts b/src/core/file/fileProcess.ts index 85e7016d7..be9876165 100644 --- a/src/core/file/fileProcess.ts +++ b/src/core/file/fileProcess.ts @@ -23,7 +23,7 @@ export const processFiles = async ( ): Promise => { const taskRunner = deps.initTaskRunner({ numOfTasks: rawFiles.length, - workerPath: new URL('./workers/fileProcessWorker.js', import.meta.url).href, + workerType: 'fileProcess', // High memory usage and leak risk runtime: 'worker_threads', }); diff --git a/src/core/file/workers/fileCollectWorker.ts b/src/core/file/workers/fileCollectWorker.ts index 875ef337c..7da9d0ab4 100644 --- a/src/core/file/workers/fileCollectWorker.ts +++ b/src/core/file/workers/fileCollectWorker.ts @@ -51,6 +51,6 @@ export default async ({ filePath, rootDir, maxFileSize }: FileCollectTask): Prom }; // Export cleanup function for Tinypool teardown (no cleanup needed for this worker) -export const onWorkerTermination = () => { +export const onWorkerTermination = async (): Promise => { // No cleanup needed for file collection worker }; diff --git a/src/core/file/workers/fileProcessWorker.ts b/src/core/file/workers/fileProcessWorker.ts index 3e51da52c..8832533a9 100644 --- a/src/core/file/workers/fileProcessWorker.ts +++ b/src/core/file/workers/fileProcessWorker.ts @@ -22,6 +22,6 @@ export default async ({ rawFile, config }: FileProcessTask): Promise { +export const onWorkerTermination = async (): Promise => { await cleanupLanguageParser(); }; diff --git a/src/core/metrics/calculateMetrics.ts b/src/core/metrics/calculateMetrics.ts index 56679f7e0..6ec76a3e4 100644 --- a/src/core/metrics/calculateMetrics.ts +++ b/src/core/metrics/calculateMetrics.ts @@ -43,7 +43,7 @@ export const calculateMetrics = async ( deps.taskRunner ?? initTaskRunner({ numOfTasks: processedFiles.length, - workerPath: new URL('./workers/calculateMetricsWorker.js', import.meta.url).href, + workerType: 'calculateMetrics', runtime: 'worker_threads', }); diff --git a/src/core/metrics/workers/calculateMetricsWorker.ts b/src/core/metrics/workers/calculateMetricsWorker.ts index 34fc0ee48..241af02e0 100644 --- a/src/core/metrics/workers/calculateMetricsWorker.ts +++ b/src/core/metrics/workers/calculateMetricsWorker.ts @@ -45,6 +45,6 @@ export default async (task: TokenCountTask): Promise => { }; // Export cleanup function for Tinypool teardown -export const onWorkerTermination = () => { +export const onWorkerTermination = async (): Promise => { freeTokenCounters(); }; diff --git a/src/core/security/securityCheck.ts b/src/core/security/securityCheck.ts index d20cf692a..3fffed911 100644 --- a/src/core/security/securityCheck.ts +++ b/src/core/security/securityCheck.ts @@ -57,7 +57,7 @@ export const runSecurityCheck = async ( const taskRunner = deps.initTaskRunner({ numOfTasks: rawFiles.length + gitDiffTasks.length + gitLogTasks.length, - workerPath: new URL('./workers/securityCheckWorker.js', import.meta.url).href, + workerType: 'securityCheck', runtime: 'worker_threads', }); const fileTasks = rawFiles.map( diff --git a/src/core/security/workers/securityCheckWorker.ts b/src/core/security/workers/securityCheckWorker.ts index acc4c6b6d..73e00a31b 100644 --- a/src/core/security/workers/securityCheckWorker.ts +++ b/src/core/security/workers/securityCheckWorker.ts @@ -85,6 +85,6 @@ export const createSecretLintConfig = (): SecretLintCoreConfig => ({ }); // Export cleanup function for Tinypool teardown (no cleanup needed for this worker) -export const onWorkerTermination = () => { +export const onWorkerTermination = async (): Promise => { // No cleanup needed for security check worker }; diff --git a/src/core/treeSitter/loadLanguage.ts b/src/core/treeSitter/loadLanguage.ts index bf672cd49..5e34e0bdd 100644 --- a/src/core/treeSitter/loadLanguage.ts +++ b/src/core/treeSitter/loadLanguage.ts @@ -1,9 +1,32 @@ import fs from 'node:fs/promises'; import { createRequire } from 'node:module'; +import path from 'node:path'; import { Language } from 'web-tree-sitter'; const require = createRequire(import.meta.url); +/** + * Custom WASM base path for bundled environments. + * Set via REPOMIX_WASM_DIR environment variable or setWasmBasePath(). + * When set, WASM files are loaded from this directory instead of node_modules. + */ +let customWasmBasePath: string | null = null; + +/** + * Set a custom base path for WASM files. + * Used in bundled environments where WASM files are copied to a custom location. + */ +export function setWasmBasePath(basePath: string): void { + customWasmBasePath = basePath; +} + +/** + * Get the WASM base path from environment variable or custom setting. + */ +function getWasmBasePath(): string | null { + return customWasmBasePath ?? process.env.REPOMIX_WASM_DIR ?? null; +} + export async function loadLanguage(langName: string): Promise { if (!langName) { throw new Error('Invalid language name'); @@ -19,7 +42,17 @@ export async function loadLanguage(langName: string): Promise { } async function getWasmPath(langName: string): Promise { - const wasmPath = require.resolve(`@repomix/tree-sitter-wasms/out/tree-sitter-${langName}.wasm`); + const wasmBasePath = getWasmBasePath(); + + let wasmPath: string; + if (wasmBasePath) { + // Use custom WASM path for bundled environments + wasmPath = path.join(wasmBasePath, `tree-sitter-${langName}.wasm`); + } else { + // Use require.resolve for standard node_modules environments + wasmPath = require.resolve(`@repomix/tree-sitter-wasms/out/tree-sitter-${langName}.wasm`); + } + try { await fs.access(wasmPath); return wasmPath; diff --git a/src/index.ts b/src/index.ts index 0c1e5bac5..7a695c5d2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,6 +24,7 @@ export { TokenCounter } from './core/metrics/TokenCounter.js'; // Tree-sitter export { parseFile } from './core/treeSitter/parseFile.js'; +export { setWasmBasePath } from './core/treeSitter/loadLanguage.js'; // --------------------------------------------------------------------------------------------------------------------- // Config @@ -56,3 +57,12 @@ export { runDefaultAction, buildCliConfig } from './cli/actions/defaultAction.js // Remote action export { runRemoteAction } from './cli/actions/remoteAction.js'; + +// --------------------------------------------------------------------------------------------------------------------- +// Worker (for bundled environments) +// --------------------------------------------------------------------------------------------------------------------- +export { + default as unifiedWorkerHandler, + onWorkerTermination as unifiedWorkerTermination, + type WorkerType, +} from './shared/unifiedWorker.js'; diff --git a/src/shared/processConcurrency.ts b/src/shared/processConcurrency.ts index 68691af81..4c973e32f 100644 --- a/src/shared/processConcurrency.ts +++ b/src/shared/processConcurrency.ts @@ -1,15 +1,47 @@ import os from 'node:os'; import { type Options, Tinypool } from 'tinypool'; import { logger } from './logger.js'; +import type { WorkerType } from './unifiedWorker.js'; export type WorkerRuntime = NonNullable; +// Re-export WorkerType for external consumers +export type { WorkerType } from './unifiedWorker.js'; + export interface WorkerOptions { numOfTasks: number; - workerPath: string; + workerType: WorkerType; runtime: WorkerRuntime; } +/** + * Get the worker file path for a given worker type. + * In bundled environments (REPOMIX_WORKER_PATH set), uses the unified worker. + * Otherwise, uses individual worker files. + */ +const getWorkerPath = (workerType: WorkerType): string => { + // Bundled environment: use unified worker path + if (process.env.REPOMIX_WORKER_PATH) { + return process.env.REPOMIX_WORKER_PATH; + } + + // Non-bundled environment: use individual worker files + switch (workerType) { + case 'fileCollect': + return new URL('../core/file/workers/fileCollectWorker.js', import.meta.url).href; + case 'fileProcess': + return new URL('../core/file/workers/fileProcessWorker.js', import.meta.url).href; + case 'securityCheck': + return new URL('../core/security/workers/securityCheckWorker.js', import.meta.url).href; + case 'calculateMetrics': + return new URL('../core/metrics/workers/calculateMetricsWorker.js', import.meta.url).href; + case 'defaultAction': + return new URL('../cli/actions/workers/defaultActionWorker.js', import.meta.url).href; + default: + throw new Error(`Unknown worker type: ${workerType}`); + } +}; + // Worker initialization is expensive, so we prefer fewer threads unless there are many files const TASKS_PER_THREAD = 100; @@ -32,11 +64,14 @@ export const getWorkerThreadCount = (numOfTasks: number): { minThreads: number; }; export const createWorkerPool = (options: WorkerOptions): Tinypool => { - const { numOfTasks, workerPath, runtime = 'child_process' } = options; + const { numOfTasks, workerType, runtime = 'child_process' } = options; const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); + // Get worker path - uses unified worker in bundled env, individual files otherwise + const workerPath = getWorkerPath(workerType); + logger.trace( - `Initializing worker pool with min=${minThreads}, max=${maxThreads} threads, runtime=${runtime}. Worker path: ${workerPath}`, + `Initializing worker pool with min=${minThreads}, max=${maxThreads} threads, runtime=${runtime}. Worker type: ${workerType}`, ); const startTime = process.hrtime.bigint(); @@ -49,12 +84,16 @@ export const createWorkerPool = (options: WorkerOptions): Tinypool => { idleTimeout: 5000, teardown: 'onWorkerTermination', workerData: { + workerType, logLevel: logger.getLogLevel(), }, // Only add env for child_process workers ...(runtime === 'child_process' && { env: { ...process.env, + // Pass worker type as environment variable for child_process workers + // This is needed because workerData is not directly accessible in child_process runtime + REPOMIX_WORKER_TYPE: workerType, // Pass log level as environment variable for child_process workers REPOMIX_LOG_LEVEL: logger.getLogLevel().toString(), // Ensure color support in child_process workers diff --git a/src/shared/unifiedWorker.ts b/src/shared/unifiedWorker.ts new file mode 100644 index 000000000..1adec3c91 --- /dev/null +++ b/src/shared/unifiedWorker.ts @@ -0,0 +1,189 @@ +/** + * Unified Worker Entry Point + * + * This module serves as a single entry point for all worker types in Repomix. + * It enables full bundling support by allowing the bundled file to spawn workers + * using itself (import.meta.url), eliminating path resolution issues. + * + * When running as a worker, it dynamically imports the appropriate worker handler + * based on the workerType specified in workerData. + */ + +import { workerData } from 'node:worker_threads'; + +// Worker type definitions +export type WorkerType = 'fileCollect' | 'fileProcess' | 'securityCheck' | 'calculateMetrics' | 'defaultAction'; + +// Worker handler type - uses 'any' to accommodate different worker signatures +// biome-ignore lint/suspicious/noExplicitAny: Worker handlers have varying signatures +type WorkerHandler = (task: any) => Promise; +type WorkerCleanup = () => void | Promise; + +// Cache loaded handlers by worker type +const handlerCache = new Map(); + +/** + * Dynamically load the appropriate worker handler based on workerType. + * Uses dynamic imports to avoid loading all worker code when not needed. + * Results are cached for reuse. + */ +const loadWorkerHandler = async ( + workerType: WorkerType, +): Promise<{ handler: WorkerHandler; cleanup?: WorkerCleanup }> => { + // Check cache first + const cached = handlerCache.get(workerType); + if (cached) { + return cached; + } + + let result: { handler: WorkerHandler; cleanup?: WorkerCleanup }; + + switch (workerType) { + case 'fileCollect': { + const module = await import('../core/file/workers/fileCollectWorker.js'); + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; + } + case 'fileProcess': { + const module = await import('../core/file/workers/fileProcessWorker.js'); + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; + } + case 'securityCheck': { + const module = await import('../core/security/workers/securityCheckWorker.js'); + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; + } + case 'calculateMetrics': { + const module = await import('../core/metrics/workers/calculateMetricsWorker.js'); + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; + } + case 'defaultAction': { + const module = await import('../cli/actions/workers/defaultActionWorker.js'); + result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination }; + break; + } + default: + throw new Error(`Unknown worker type: ${workerType}`); + } + + // Cache the result + handlerCache.set(workerType, result); + return result; +}; + +/** + * Infer worker type from task structure. + * This is used in bundled environments where Tinypool may reuse child processes + * across different worker pools. + */ +const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => { + if (!task || typeof task !== 'object') { + return null; + } + + const taskObj = task as Record; + + // defaultAction: has directories, cwd, config, cliOptions + if ('directories' in taskObj && 'cwd' in taskObj && 'config' in taskObj) { + return 'defaultAction'; + } + + // defaultAction ping task + if ('ping' in taskObj) { + return 'defaultAction'; + } + + // fileCollect: has filePath, rootDir, maxFileSize + if ('filePath' in taskObj && 'rootDir' in taskObj && 'maxFileSize' in taskObj) { + return 'fileCollect'; + } + + // fileProcess: has rawFile (nested object) and config + if ('rawFile' in taskObj && 'config' in taskObj) { + return 'fileProcess'; + } + + // calculateMetrics: has content, encoding (must check before securityCheck) + if ('content' in taskObj && 'encoding' in taskObj) { + return 'calculateMetrics'; + } + + // securityCheck: has filePath, content, type + if ('filePath' in taskObj && 'content' in taskObj && 'type' in taskObj) { + return 'securityCheck'; + } + + return null; +}; + +/** + * Get workerType from workerData. + * In Tinypool child_process mode, workerData is an array. + */ +const getWorkerTypeFromWorkerData = (): WorkerType | undefined => { + if (!workerData) { + return undefined; + } + + // Handle array format (Tinypool child_process mode) + if (Array.isArray(workerData)) { + for (const item of workerData) { + if (item && typeof item === 'object' && 'workerType' in item) { + return item.workerType as WorkerType; + } + } + return undefined; + } + + // Handle object format (worker_threads mode) + if (typeof workerData === 'object' && 'workerType' in workerData) { + return (workerData as { workerType?: WorkerType }).workerType; + } + + return undefined; +}; + +/** + * Default export for Tinypool. + * This function is called for each task and delegates to the appropriate handler. + * + * In bundled environments where Tinypool may reuse child processes across different + * worker pools, we use task-based inference to determine the correct handler. + */ +export default async (task: unknown): Promise => { + // Determine worker type: try workerData/env first, then infer from task + let workerType: WorkerType | undefined = + getWorkerTypeFromWorkerData() ?? (process.env.REPOMIX_WORKER_TYPE as WorkerType | undefined); + + // In bundled environments, Tinypool may reuse child processes. + // If the task doesn't match the initially configured worker type, infer from task. + const inferredType = inferWorkerTypeFromTask(task); + + // Use inferred type if available (more reliable in bundled env) + if (inferredType) { + workerType = inferredType; + } + + if (!workerType) { + throw new Error('Cannot determine worker type from workerData, env, or task structure'); + } + + // Load handler (cached) + const { handler } = await loadWorkerHandler(workerType); + return handler(task); +}; + +/** + * Cleanup function for Tinypool teardown. + * Cleans up all cached handlers. + */ +export const onWorkerTermination = async (): Promise => { + for (const { cleanup } of handlerCache.values()) { + if (cleanup) { + await cleanup(); + } + } + handlerCache.clear(); +}; diff --git a/tests/cli/actions/defaultAction.test.ts b/tests/cli/actions/defaultAction.test.ts index c2858b9fc..4d5852a98 100644 --- a/tests/cli/actions/defaultAction.test.ts +++ b/tests/cli/actions/defaultAction.test.ts @@ -149,7 +149,7 @@ describe('defaultAction', () => { expect(processConcurrency.initTaskRunner).toHaveBeenCalledWith({ numOfTasks: 1, - workerPath: expect.stringContaining('defaultActionWorker.js'), + workerType: 'defaultAction', runtime: 'child_process', }); diff --git a/tests/core/metrics/calculateGitDiffMetrics.test.ts b/tests/core/metrics/calculateGitDiffMetrics.test.ts index a84a8b9b3..adcdf5d75 100644 --- a/tests/core/metrics/calculateGitDiffMetrics.test.ts +++ b/tests/core/metrics/calculateGitDiffMetrics.test.ts @@ -67,7 +67,11 @@ describe('calculateGitDiffMetrics', () => { cwd: '/test/project', }; - const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }); + const mockTaskRunner = mockInitTaskRunner({ + numOfTasks: 1, + workerType: 'calculateMetrics', + runtime: 'worker_threads', + }); beforeEach(() => { vi.clearAllMocks(); diff --git a/tests/core/metrics/calculateGitLogMetrics.test.ts b/tests/core/metrics/calculateGitLogMetrics.test.ts index 86f234391..1c53b90b7 100644 --- a/tests/core/metrics/calculateGitLogMetrics.test.ts +++ b/tests/core/metrics/calculateGitLogMetrics.test.ts @@ -67,7 +67,11 @@ describe('calculateGitLogMetrics', () => { cwd: '/test/project', }; - const mockTaskRunner = mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }); + const mockTaskRunner = mockInitTaskRunner({ + numOfTasks: 1, + workerType: 'calculateMetrics', + runtime: 'worker_threads', + }); beforeEach(() => { vi.clearAllMocks(); diff --git a/tests/core/metrics/calculateOutputMetrics.test.ts b/tests/core/metrics/calculateOutputMetrics.test.ts index bc2f900cc..104914ff7 100644 --- a/tests/core/metrics/calculateOutputMetrics.test.ts +++ b/tests/core/metrics/calculateOutputMetrics.test.ts @@ -24,7 +24,7 @@ describe('calculateOutputMetrics', () => { const path = 'test.txt'; const result = await calculateOutputMetrics(content, encoding, path, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(result).toBe(2); // 'test content' should be counted as 2 tokens @@ -35,7 +35,7 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const result = await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(result).toBe(2); @@ -59,7 +59,7 @@ describe('calculateOutputMetrics', () => { await expect( calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }), ).rejects.toThrow('Worker error'); @@ -71,7 +71,7 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const result = await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(result).toBe(0); @@ -82,7 +82,7 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const result = await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(result).toBeGreaterThan(0); @@ -110,7 +110,7 @@ describe('calculateOutputMetrics', () => { }; const result = await calculateOutputMetrics(content, encoding, path, { - taskRunner: mockParallelTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockParallelTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }); expect(chunksProcessed).toBeGreaterThan(1); // Should have processed multiple chunks @@ -135,7 +135,7 @@ describe('calculateOutputMetrics', () => { await expect( calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockErrorTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }), ).rejects.toThrow('Parallel processing error'); @@ -161,7 +161,11 @@ describe('calculateOutputMetrics', () => { }; await calculateOutputMetrics(content, encoding, undefined, { - taskRunner: mockChunkTrackingTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockChunkTrackingTaskRunner({ + numOfTasks: 1, + workerType: 'calculateMetrics', + runtime: 'worker_threads', + }), }); // Check that chunks are roughly equal in size diff --git a/tests/core/metrics/calculateSelectiveFileMetrics.test.ts b/tests/core/metrics/calculateSelectiveFileMetrics.test.ts index 8a0360949..2e89b4161 100644 --- a/tests/core/metrics/calculateSelectiveFileMetrics.test.ts +++ b/tests/core/metrics/calculateSelectiveFileMetrics.test.ts @@ -36,7 +36,7 @@ describe('calculateSelectiveFileMetrics', () => { 'o200k_base', progressCallback, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }, ); @@ -57,7 +57,7 @@ describe('calculateSelectiveFileMetrics', () => { 'o200k_base', progressCallback, { - taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerPath: '', runtime: 'worker_threads' }), + taskRunner: mockInitTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }), }, ); diff --git a/tests/shared/processConcurrency.test.ts b/tests/shared/processConcurrency.test.ts index 983da939d..ee90c7400 100644 --- a/tests/shared/processConcurrency.test.ts +++ b/tests/shared/processConcurrency.test.ts @@ -72,17 +72,17 @@ describe('processConcurrency', () => { }); it('should initialize Tinypool with correct configuration', () => { - const workerPath = '/path/to/worker.js'; - const tinypool = createWorkerPool({ numOfTasks: 500, workerPath, runtime: 'child_process' }); + const tinypool = createWorkerPool({ numOfTasks: 500, workerType: 'fileCollect', runtime: 'child_process' }); expect(Tinypool).toHaveBeenCalledWith({ - filename: workerPath, + filename: expect.stringContaining('fileCollectWorker.js'), runtime: 'child_process', minThreads: 1, maxThreads: 4, // Math.min(4, 500/100) = 4 idleTimeout: 5000, teardown: 'onWorkerTermination', workerData: { + workerType: 'fileCollect', logLevel: 2, }, env: expect.objectContaining({ @@ -95,17 +95,17 @@ describe('processConcurrency', () => { }); it('should initialize Tinypool with worker_threads runtime when specified', () => { - const workerPath = '/path/to/worker.js'; - const tinypool = createWorkerPool({ numOfTasks: 500, workerPath, runtime: 'worker_threads' }); + const tinypool = createWorkerPool({ numOfTasks: 500, workerType: 'securityCheck', runtime: 'worker_threads' }); expect(Tinypool).toHaveBeenCalledWith({ - filename: workerPath, + filename: expect.stringContaining('securityCheckWorker.js'), runtime: 'worker_threads', minThreads: 1, maxThreads: 4, // Math.min(4, 500/100) = 4 idleTimeout: 5000, teardown: 'onWorkerTermination', workerData: { + workerType: 'securityCheck', logLevel: 2, }, }); @@ -126,8 +126,7 @@ describe('processConcurrency', () => { }); it('should return a TaskRunner with run and cleanup methods', () => { - const workerPath = '/path/to/worker.js'; - const taskRunner = initTaskRunner({ numOfTasks: 100, workerPath, runtime: 'child_process' }); + const taskRunner = initTaskRunner({ numOfTasks: 100, workerType: 'fileProcess', runtime: 'child_process' }); expect(taskRunner).toHaveProperty('run'); expect(taskRunner).toHaveProperty('cleanup'); @@ -136,12 +135,14 @@ describe('processConcurrency', () => { }); it('should pass runtime parameter to createWorkerPool', () => { - const workerPath = '/path/to/worker.js'; - const taskRunner = initTaskRunner({ numOfTasks: 100, workerPath, runtime: 'worker_threads' }); + const taskRunner = initTaskRunner({ numOfTasks: 100, workerType: 'calculateMetrics', runtime: 'worker_threads' }); expect(Tinypool).toHaveBeenCalledWith( expect.objectContaining({ runtime: 'worker_threads', + workerData: expect.objectContaining({ + workerType: 'calculateMetrics', + }), }), ); expect(taskRunner).toHaveProperty('run'); diff --git a/tests/shared/unifiedWorker.test.ts b/tests/shared/unifiedWorker.test.ts new file mode 100644 index 000000000..ee1823adb --- /dev/null +++ b/tests/shared/unifiedWorker.test.ts @@ -0,0 +1,171 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// We need to test the internal functions, so we'll test through the module behavior +// Mock all worker modules +vi.mock('../../src/core/file/workers/fileCollectWorker.js', () => ({ + default: vi.fn().mockResolvedValue({ collected: true }), + onWorkerTermination: vi.fn(), +})); +vi.mock('../../src/core/file/workers/fileProcessWorker.js', () => ({ + default: vi.fn().mockResolvedValue({ processed: true }), + onWorkerTermination: vi.fn(), +})); +vi.mock('../../src/core/security/workers/securityCheckWorker.js', () => ({ + default: vi.fn().mockResolvedValue(null), + onWorkerTermination: vi.fn(), +})); +vi.mock('../../src/core/metrics/workers/calculateMetricsWorker.js', () => ({ + default: vi.fn().mockResolvedValue(100), + onWorkerTermination: vi.fn(), +})); +vi.mock('../../src/cli/actions/workers/defaultActionWorker.js', () => ({ + default: vi.fn().mockResolvedValue({ packResult: {}, config: {} }), + onWorkerTermination: vi.fn(), +})); + +// Mock worker_threads +vi.mock('node:worker_threads', () => ({ + workerData: undefined, +})); + +describe('unifiedWorker', () => { + beforeEach(() => { + vi.clearAllMocks(); + // Reset module cache to clear handler cache + vi.resetModules(); + }); + + describe('inferWorkerTypeFromTask', () => { + it('should infer defaultAction from task with directories, cwd, config', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + directories: ['.'], + cwd: '/test', + config: {}, + cliOptions: {}, + }; + + await handler(task); + + const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js'); + expect(defaultActionWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer defaultAction from ping task', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { ping: true }; + + await handler(task); + + const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js'); + expect(defaultActionWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer fileCollect from task with filePath, rootDir, maxFileSize', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + filePath: 'test.ts', + rootDir: '/root', + maxFileSize: 1000, + }; + + await handler(task); + + const fileCollectWorker = await import('../../src/core/file/workers/fileCollectWorker.js'); + expect(fileCollectWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer fileProcess from task with rawFile and config', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + rawFile: { path: 'test.ts', content: 'code' }, + config: {}, + }; + + await handler(task); + + const fileProcessWorker = await import('../../src/core/file/workers/fileProcessWorker.js'); + expect(fileProcessWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer calculateMetrics from task with content and encoding', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + content: 'test content', + encoding: 'cl100k_base', + }; + + await handler(task); + + const calculateMetricsWorker = await import('../../src/core/metrics/workers/calculateMetricsWorker.js'); + expect(calculateMetricsWorker.default).toHaveBeenCalledWith(task); + }); + + it('should infer securityCheck from task with filePath, content, type', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { + filePath: 'test.ts', + content: 'test content', + type: 'file', + }; + + await handler(task); + + const securityCheckWorker = await import('../../src/core/security/workers/securityCheckWorker.js'); + expect(securityCheckWorker.default).toHaveBeenCalledWith(task); + }); + + it('should throw error for unrecognizable task structure', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + const task = { unknownField: 'value' }; + + await expect(handler(task)).rejects.toThrow('Cannot determine worker type'); + }); + + it('should throw error for null task', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + + await expect(handler(null)).rejects.toThrow('Cannot determine worker type'); + }); + + it('should throw error for non-object task', async () => { + const { default: handler } = await import('../../src/shared/unifiedWorker.js'); + + await expect(handler('string')).rejects.toThrow('Cannot determine worker type'); + }); + }); + + describe('onWorkerTermination', () => { + it('should call cleanup on cached handlers', async () => { + // First, load a handler to populate the cache + const { default: handler, onWorkerTermination } = await import('../../src/shared/unifiedWorker.js'); + const task = { ping: true }; + + await handler(task); + + // Now call termination + await onWorkerTermination(); + + const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js'); + expect(defaultActionWorker.onWorkerTermination).toHaveBeenCalled(); + }); + + it('should clear handler cache after cleanup', async () => { + const { default: handler, onWorkerTermination } = await import('../../src/shared/unifiedWorker.js'); + + // Load handler + await handler({ ping: true }); + + // Terminate + await onWorkerTermination(); + + // Load again - should call the module import again + vi.clearAllMocks(); + await handler({ ping: true }); + + const defaultActionWorker = await import('../../src/cli/actions/workers/defaultActionWorker.js'); + // The handler should be called again (cache was cleared) + expect(defaultActionWorker.default).toHaveBeenCalled(); + }); + }); +}); diff --git a/website/compose.yml b/website/compose.yml index 9ba4d4408..91941dbaf 100644 --- a/website/compose.yml +++ b/website/compose.yml @@ -26,6 +26,9 @@ services: environment: - NODE_ENV=development - PORT=8080 + # Unset bundled mode for local development (volume mount overwrites bundled files) + - REPOMIX_WORKER_PATH= + - REPOMIX_WASM_DIR= # override default command command: sh -c "npm i && npm run dev" develop: diff --git a/website/server/Dockerfile b/website/server/Dockerfile index baf494be9..2cc2d33aa 100644 --- a/website/server/Dockerfile +++ b/website/server/Dockerfile @@ -1,5 +1,5 @@ # ============================================================================== -# Base image +# Build stage # ============================================================================== FROM node:24-alpine AS builder @@ -12,26 +12,14 @@ COPY package*.json ./ # Install all dependencies (including dev dependencies for build) RUN npm ci -# Copy source code +# Copy source code and build COPY . . - -# Build TypeScript RUN npm run build -# ============================================================================== -# Production dependencies -# ============================================================================== -FROM node:24-alpine AS deps - -# Install git (required for GitHub-based npm dependencies) -RUN apk add --no-cache git - -WORKDIR /app -COPY package*.json ./ - -# Install only production dependencies -RUN npm ci --omit=dev --ignore-scripts && \ - npm cache clean --force +# Bundle the server with esbuild for unified worker support +RUN npx esbuild dist/index.js --bundle --platform=node --target=node20 \ + --format=esm --outfile=dist-bundled/server.mjs --external:tinypool \ + --banner:js="import { createRequire as _createRequire } from 'module'; const require = _createRequire(import.meta.url); import { fileURLToPath as _fileURLToPath } from 'url'; import { dirname as _dirname } from 'path'; const __filename = _fileURLToPath(import.meta.url); const __dirname = _dirname(__filename);" # ============================================================================== # Runtime image @@ -43,18 +31,23 @@ RUN apk add --no-cache git ca-certificates WORKDIR /app -# Copy built application -COPY --from=builder /app/dist ./dist +# Copy bundled server +COPY --from=builder /app/dist-bundled ./dist-bundled + +# Copy WASM files for tree-sitter (may be hoisted to root node_modules) +COPY --from=builder /app/node_modules/@repomix/tree-sitter-wasms/out/*.wasm ./wasm/ -# Copy production dependencies -COPY --from=deps /app/node_modules ./node_modules +# Copy tinypool (external dependency) +COPY --from=builder /app/node_modules/tinypool ./node_modules/tinypool -# Set environment variables +# Set environment variables for bundled mode ENV NODE_ENV=production \ - PORT=8080 + PORT=8080 \ + REPOMIX_WORKER_PATH=/app/dist-bundled/server.mjs \ + REPOMIX_WASM_DIR=/app/wasm # Expose port EXPOSE 8080 -# Start the server directly -CMD ["node", "dist/index.js"] +# Start the bundled server +CMD ["node", "dist-bundled/server.mjs"] diff --git a/website/server/package-lock.json b/website/server/package-lock.json index 8fc2dea85..903ea5e02 100644 --- a/website/server/package-lock.json +++ b/website/server/package-lock.json @@ -3594,7 +3594,7 @@ }, "node_modules/repomix": { "version": "1.11.0", - "resolved": "git+ssh://git@github.com/yamadashy/repomix.git#5b02cb56ed606f283e2fd118e07e69c75ee99d8b", + "resolved": "git+ssh://git@github.com/yamadashy/repomix.git#3c4c19236859def5d4b7c10091c4ab696caa5b73", "license": "MIT", "dependencies": { "@clack/prompts": "^0.11.0", diff --git a/website/server/src/index.ts b/website/server/src/index.ts index a1b6e3113..670a2306a 100644 --- a/website/server/src/index.ts +++ b/website/server/src/index.ts @@ -10,51 +10,63 @@ import { rateLimitMiddleware } from './middlewares/rateLimit.js'; import { logInfo, logMemoryUsage } from './utils/logger.js'; import { getProcessConcurrency } from './utils/processConcurrency.js'; -const API_TIMEOUT_MS = 35_000; +// Re-export unified worker for bundled environment +// When this file is used as a Tinypool worker, it needs to export the handler +export { unifiedWorkerHandler as default, unifiedWorkerTermination as onWorkerTermination } from 'repomix'; -// Log server metrics on startup -logInfo('Server starting', { - metrics: { - processConcurrency: getProcessConcurrency(), - }, -}); +// Check if running as a Tinypool worker (bundled environment) +// In bundled mode, this file is used both as server entry and worker entry +const isTinypoolWorker = (): boolean => { + const tinypoolState = (process as NodeJS.Process & { __tinypool_state__?: { isTinypoolWorker?: boolean } }) + .__tinypool_state__; + return tinypoolState?.isTinypoolWorker ?? false; +}; + +// Skip server initialization if running as a Tinypool worker +if (!isTinypoolWorker()) { + const API_TIMEOUT_MS = 35_000; -// Log initial memory usage -logMemoryUsage('Server startup', { - processConcurrency: getProcessConcurrency(), -}); + // Log server metrics on startup + logInfo('Server starting', { + metrics: { + processConcurrency: getProcessConcurrency(), + }, + }); -const app = new Hono(); + // Log initial memory usage + logMemoryUsage('Server startup', { + processConcurrency: getProcessConcurrency(), + }); -// Configure CORS -app.use('/*', corsMiddleware); + const app = new Hono(); -// Enable compression -app.use(compress()); + // Configure CORS + app.use('/*', corsMiddleware); -// Set timeout for API routes -app.use('/api', timeout(API_TIMEOUT_MS)); + // Enable compression + app.use(compress()); -// Setup custom logger -app.use('*', cloudLoggerMiddleware()); + // Set timeout for API routes + app.use('/api', timeout(API_TIMEOUT_MS)); -// Apply rate limiting to API routes -app.use('/api/*', rateLimitMiddleware()); + // Setup custom logger + app.use('*', cloudLoggerMiddleware()); -// Health check endpoint -app.get('/health', (c) => c.text('OK')); + // Apply rate limiting to API routes + app.use('/api/*', rateLimitMiddleware()); -// Main packing endpoint -app.post('/api/pack', bodyLimitMiddleware, packAction); + // Health check endpoint + app.get('/health', (c) => c.text('OK')); -// Start server -const port = process.env.PORT ? Number.parseInt(process.env.PORT, 10) : 3000; -logInfo(`Server starting on port ${port}`); + // Main packing endpoint + app.post('/api/pack', bodyLimitMiddleware, packAction); -serve({ - fetch: app.fetch, - port, -}); + // Start server + const port = process.env.PORT ? Number.parseInt(process.env.PORT, 10) : 3000; + logInfo(`Server starting on port ${port}`); -// Export app for testing -export default app; + serve({ + fetch: app.fetch, + port, + }); +}