diff --git a/gitnexus/src/core/embeddings/embedder.ts b/gitnexus/src/core/embeddings/embedder.ts index d873f3a0a3..3ec5f08e16 100644 --- a/gitnexus/src/core/embeddings/embedder.ts +++ b/gitnexus/src/core/embeddings/embedder.ts @@ -28,6 +28,7 @@ import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js'; import { resolveEmbeddingConfig } from './config.js'; import { applyHfEnvOverrides, isHfDownloadFailure, withHfDownloadRetry } from './hf-env.js'; import { getLocalEmbeddingRuntimeBlocker } from './runtime-support.js'; +import { ensureOnnxRuntimeCommonResolvable } from './onnxruntime-common-resolver.js'; import { logger } from '../logger.js'; /** @@ -179,6 +180,9 @@ export const initEmbedder = async ( try { // Lazy-load transformers.js only after the runtime guard has passed, so // unsupported platforms never reach the native ONNX import (#1515). + // Under pnpm-strict / `pnpm dlx`, transformers' phantom `onnxruntime-common` + // import is unresolvable; register the fallback resolver first (#307). + ensureOnnxRuntimeCommonResolvable(); const { pipeline, env } = await import('@huggingface/transformers'); // Configure transformers.js environment diff --git a/gitnexus/src/core/embeddings/onnxruntime-common-resolver.ts b/gitnexus/src/core/embeddings/onnxruntime-common-resolver.ts new file mode 100644 index 0000000000..fbb4f40823 --- /dev/null +++ b/gitnexus/src/core/embeddings/onnxruntime-common-resolver.ts @@ -0,0 +1,133 @@ +/** + * Make `@huggingface/transformers`' phantom `onnxruntime-common` import + * resolvable under strict package-manager layouts (#307, #2069). + * + * ## Why + * transformers' shipped `dist/transformers.node.mjs` does a bare + * `import 'onnxruntime-common'`, but transformers' `package.json` never declares + * onnxruntime-common (it lists onnxruntime-node / onnxruntime-web / sharp). With + * npm's flat `node_modules` — or pnpm with hoisting — the package is hoisted to + * a directory on transformers' resolution path and the import resolves by + * accident. Under pnpm's isolated store (and therefore `pnpm dlx` / `pnpx`), a + * package only sees its *declared* deps, so the import dies with + * `ERR_MODULE_NOT_FOUND` before `analyze --embeddings` can run. + * + * Declaring onnxruntime-common in gitnexus' own dependencies (#2074) does NOT + * fix this under pnpm: Node resolves the bare specifier from *transformers'* + * module scope, not ours, and overrides/resolutions can only re-version an + * existing edge, never add the missing one. + * + * ## What this does + * Install a synchronous, in-thread ESM resolution hook (`module.registerHooks`, + * Node >= 22.15) that redirects `onnxruntime-common` to a copy gitnexus can + * resolve — but only when the default resolver fails. The redirect target is + * preferentially the `onnxruntime-common` that `onnxruntime-node` (the native + * binding transformers actually loads) itself depends on, so the redirected copy + * is version-matched to that binding even under `pnpm dlx` — where gitnexus' + * npm-style `overrides` block does NOT apply, because it is honoured only from a + * root manifest and gitnexus is a transitive dependency there. It falls back to + * gitnexus' own direct `onnxruntime-common` dependency when that chain can't be + * walked. onnxruntime-common is a stable, pure-JS package whose `Tensor` surface + * is unchanged across 1.24–1.26, so either target is API-compatible. On working + * layouts the default resolver succeeds first and the hook never fires, so + * behaviour is unchanged. + * + * `registerHooks` (synchronous, in-thread) is preferred over the older + * `module.register` (async, off-thread, now deprecated — DEP0205, removed in + * Node 26): the redirect is a one-line conditional that needs no worker thread, + * no separate hook module, and no `data` marshalling. + * + * ## Safety + * Best-effort and idempotent. The hook is installed lazily, only on the + * local-embedding code path (after parsing), so it is never registered during + * analysis, in the parse workers, or in HTTP embedding mode. Once installed it + * is process-global: its resolve closure runs for every subsequent module + * resolution, but it passes all of them through untouched and only substitutes a + * result for the exact `onnxruntime-common` specifier when that specifier is + * genuinely absent — so it cannot mask an unrelated resolution error, and the + * per-resolution cost is a single string comparison. + * + * `module.registerHooks` is marked `@experimental` and requires Node >= 22.15 + * (the gitnexus engines floor is >= 22.0.0). On older runtimes it is absent and + * this is a graceful no-op: embeddings then resolve onnxruntime-common exactly + * as before — fine on hoisted layouts. Any failure during installation is + * swallowed. + */ +import { registerHooks, createRequire } from 'node:module'; +import { pathToFileURL } from 'node:url'; +import { logger } from '../logger.js'; + +let attempted = false; + +/** + * Compute the file: URL the hook redirects `onnxruntime-common` to. + * + * Prefer the copy `onnxruntime-node` (the native binding transformers loads) + * depends on, so the redirected module is version-matched to the binding even + * under `pnpm dlx`, where transformers keeps its own pinned onnxruntime-node. + * The walk resolves transformers' MAIN entry — NOT `@huggingface/transformers/ + * package.json`, which transformers' `exports` map blocks + * (`ERR_PACKAGE_PATH_NOT_EXPORTED`) — then onnxruntime-node, then its + * onnxruntime-common. Falls back to gitnexus' own direct dependency (always + * resolvable from our scope) when any step fails. + */ +const resolveOnnxRuntimeCommonUrl = (): string => { + const require = createRequire(import.meta.url); + try { + const transformersMain = require.resolve('@huggingface/transformers'); + const ortNodePkg = createRequire(transformersMain).resolve('onnxruntime-node/package.json'); + const common = createRequire(ortNodePkg).resolve('onnxruntime-common'); + return pathToFileURL(common).href; + } catch { + return pathToFileURL(require.resolve('onnxruntime-common')).href; + } +}; + +/** + * Idempotently install the onnxruntime-common resolution fallback. Call once + * immediately before the dynamic `import('@huggingface/transformers')` on the + * local-embedding path. + */ +export const ensureOnnxRuntimeCommonResolvable = (): void => { + if (attempted) return; + // Mark attempted up-front: a failed attempt must not retry on every + // initEmbedder() call, and the hook is process-global — once is enough. + attempted = true; + + try { + // Node < 22.15 (the gitnexus engines floor is >= 22.0.0): no synchronous + // hooks API. Degrade gracefully — the import still works on hoisted layouts. + if (typeof registerHooks !== 'function') return; + + const redirectUrl = resolveOnnxRuntimeCommonUrl(); + + registerHooks({ + resolve(specifier, context, nextResolve) { + if (specifier !== 'onnxruntime-common') return nextResolve(specifier, context); + // Honour a real, package-manager-provided copy when one is on the path + // (npm / hoisted pnpm); only substitute ours when the specifier is + // genuinely absent. + try { + return nextResolve(specifier, context); + } catch (err) { + // The phantom import surfaces as ERR_MODULE_NOT_FOUND (or, for a + // present-but-exports-broken copy, ERR_PACKAGE_PATH_NOT_EXPORTED). + // Rethrow anything else so a genuinely broken install is not masked. + const code = (err as { code?: string } | null | undefined)?.code; + if (code === 'ERR_MODULE_NOT_FOUND' || code === 'ERR_PACKAGE_PATH_NOT_EXPORTED') { + return { url: redirectUrl, shortCircuit: true }; + } + throw err; + } + }, + }); + logger.debug({ redirectUrl }, 'Installed onnxruntime-common resolution fallback (#307)'); + } catch (err) { + // Never block embeddings on the fallback. On layouts where the package + // manager already resolves onnxruntime-common this is unnecessary anyway. + logger.debug( + { err: err instanceof Error ? err.message : String(err) }, + 'onnxruntime-common resolution fallback not installed', + ); + } +}; diff --git a/gitnexus/src/mcp/core/embedder.ts b/gitnexus/src/mcp/core/embedder.ts index 451d12c1f8..4dd73f317a 100644 --- a/gitnexus/src/mcp/core/embedder.ts +++ b/gitnexus/src/mcp/core/embedder.ts @@ -22,6 +22,7 @@ import { withHfDownloadRetry, } from '../../core/embeddings/hf-env.js'; import { getLocalEmbeddingRuntimeBlocker } from '../../core/embeddings/runtime-support.js'; +import { ensureOnnxRuntimeCommonResolvable } from '../../core/embeddings/onnxruntime-common-resolver.js'; import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js'; import { logger } from '../../core/logger.js'; @@ -65,6 +66,9 @@ export const initEmbedder = async (): Promise => { try { // Lazy-load transformers.js only after the runtime guard has passed, so // unsupported platforms never reach the native ONNX import (#1515). + // Under pnpm-strict / `pnpm dlx`, transformers' phantom `onnxruntime-common` + // import is unresolvable; register the fallback resolver first (#307). + ensureOnnxRuntimeCommonResolvable(); const { pipeline, env } = await import('@huggingface/transformers'); env.allowLocalModels = false; diff --git a/gitnexus/test/unit/onnxruntime-common-resolver.test.ts b/gitnexus/test/unit/onnxruntime-common-resolver.test.ts new file mode 100644 index 0000000000..801cb001ee --- /dev/null +++ b/gitnexus/test/unit/onnxruntime-common-resolver.test.ts @@ -0,0 +1,132 @@ +import { describe, it, expect, vi, afterEach } from 'vitest'; + +/** + * Tests for the #307 pnpm-strict / `pnpm dlx` fix: a synchronous in-thread ESM + * resolution hook (`module.registerHooks`) that redirects @huggingface/ + * transformers' phantom `onnxruntime-common` import to gitnexus' own copy. + * + * Each test mocks `node:module` with a chosen `registerHooks` (a spy, or + * `undefined` to simulate Node < 22.15) so we can assert one-shot installation, + * graceful degradation, and the redirect/passthrough/rethrow logic of the + * resolve closure — without mutating the real process loader. + */ + +const RESOLVER = '../../src/core/embeddings/onnxruntime-common-resolver.js'; + +/** + * (Re)load the resolver with a chosen `registerHooks` mocked into node:module. + * `vi.resetModules()` + the fresh `import()` re-initialises the module-level + * one-shot guard, so each test gets a pristine resolver with no shared state. + */ +async function loadResolver(registerHooks: unknown) { + vi.resetModules(); + vi.doMock('node:module', async (importOriginal) => { + const orig = await importOriginal(); + return { ...orig, registerHooks }; + }); + return import(RESOLVER); +} + +const ctx = { conditions: [], importAttributes: {} } as never; +const moduleNotFound = (): Error => { + const e = new Error("Cannot find package 'onnxruntime-common'") as Error & { code: string }; + e.code = 'ERR_MODULE_NOT_FOUND'; + return e; +}; + +afterEach(() => { + vi.doUnmock('node:module'); +}); + +describe('ensureOnnxRuntimeCommonResolvable — installation', () => { + it('installs the resolve hook exactly once (idempotent)', async () => { + const spy = vi.fn(); + const mod = await loadResolver(spy); + + mod.ensureOnnxRuntimeCommonResolvable(); + mod.ensureOnnxRuntimeCommonResolvable(); // second call is a no-op + + expect(spy).toHaveBeenCalledTimes(1); + expect(typeof spy.mock.calls[0][0].resolve).toBe('function'); + }); + + it('no-ops gracefully when registerHooks is unavailable (Node < 22.15)', async () => { + const mod = await loadResolver(undefined); + // Must not throw even though there is no synchronous-hooks API to call. + expect(() => mod.ensureOnnxRuntimeCommonResolvable()).not.toThrow(); + }); + + it('is best-effort: swallows a registerHooks() failure instead of throwing into the embedder', async () => { + const mod = await loadResolver( + vi.fn(() => { + throw new Error('hook-install-failed'); + }), + ); + // The call site (initEmbedder) does not guard the return; a throw here would + // break `analyze --embeddings`. The outer try/catch must absorb it. + expect(() => mod.ensureOnnxRuntimeCommonResolvable()).not.toThrow(); + }); +}); + +describe('ensureOnnxRuntimeCommonResolvable — resolve hook behaviour', () => { + /** Install the fallback and return the resolve closure handed to registerHooks. */ + async function captureResolve() { + const spy = vi.fn(); + const mod = await loadResolver(spy); + mod.ensureOnnxRuntimeCommonResolvable(); + return spy.mock.calls[0][0].resolve as ( + s: string, + c: never, + n: (s: string, c: never) => unknown, + ) => unknown; + } + + it('passes a successful default resolution through unchanged (no-op on hoisted layouts)', async () => { + const resolve = await captureResolve(); + const real = { url: 'file:///real/onnxruntime-common/index.js', shortCircuit: true }; + const next = vi.fn(() => real); + + const res = resolve('onnxruntime-common', ctx, next); + + expect(next).toHaveBeenCalledTimes(1); + expect(res).toBe(real); // the real resolution, NOT a redirect + }); + + it('redirects onnxruntime-common to the gitnexus copy when default resolution fails', async () => { + const resolve = await captureResolve(); + const next = vi.fn(() => { + throw moduleNotFound(); + }); + + const res = resolve('onnxruntime-common', ctx, next) as { url: string; shortCircuit: boolean }; + + expect(res.shortCircuit).toBe(true); + // The real resolved onnxruntime-common in node_modules (require.resolve runs + // for real here) — not just any path containing the substring. + expect(res.url).toMatch(/^file:\/\/.*\/node_modules\/onnxruntime-common\/.*\.js$/); + }); + + it('never masks an unrelated resolution failure (other specifiers rethrow)', async () => { + const resolve = await captureResolve(); + const err = moduleNotFound(); + const next = vi.fn(() => { + throw err; + }); + + expect(() => resolve('some-other-package', ctx, next)).toThrow(err); + }); + + it('rethrows when onnxruntime-common fails for a non-absence reason', async () => { + const resolve = await captureResolve(); + // A present-but-otherwise-broken resolution (not a missing package) must + // surface, not be silently papered over with gitnexus' copy. + const err = Object.assign(new Error('bad specifier'), { + code: 'ERR_INVALID_MODULE_SPECIFIER', + }); + const next = vi.fn(() => { + throw err; + }); + + expect(() => resolve('onnxruntime-common', ctx, next)).toThrow(err); + }); +});