Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions gitnexus/src/core/embeddings/embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js';
import { resolveEmbeddingConfig } from './config.js';
import { applyHfEnvOverrides, isHfDownloadFailure, withHfDownloadRetry } from './hf-env.js';
import { getLocalEmbeddingRuntimeBlocker } from './runtime-support.js';
import { ensureOnnxRuntimeCommonResolvable } from './onnxruntime-common-resolver.js';
import { logger } from '../logger.js';

/**
Expand Down Expand Up @@ -179,6 +180,9 @@ export const initEmbedder = async (
try {
// Lazy-load transformers.js only after the runtime guard has passed, so
// unsupported platforms never reach the native ONNX import (#1515).
// Under pnpm-strict / `pnpm dlx`, transformers' phantom `onnxruntime-common`
// import is unresolvable; register the fallback resolver first (#307).
ensureOnnxRuntimeCommonResolvable();
const { pipeline, env } = await import('@huggingface/transformers');

// Configure transformers.js environment
Expand Down
133 changes: 133 additions & 0 deletions gitnexus/src/core/embeddings/onnxruntime-common-resolver.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/**
* Make `@huggingface/transformers`' phantom `onnxruntime-common` import
* resolvable under strict package-manager layouts (#307, #2069).
*
* ## Why
* transformers' shipped `dist/transformers.node.mjs` does a bare
* `import 'onnxruntime-common'`, but transformers' `package.json` never declares
* onnxruntime-common (it lists onnxruntime-node / onnxruntime-web / sharp). With
* npm's flat `node_modules` — or pnpm with hoisting — the package is hoisted to
* a directory on transformers' resolution path and the import resolves by
* accident. Under pnpm's isolated store (and therefore `pnpm dlx` / `pnpx`), a
* package only sees its *declared* deps, so the import dies with
* `ERR_MODULE_NOT_FOUND` before `analyze --embeddings` can run.
*
* Declaring onnxruntime-common in gitnexus' own dependencies (#2074) does NOT
* fix this under pnpm: Node resolves the bare specifier from *transformers'*
* module scope, not ours, and overrides/resolutions can only re-version an
* existing edge, never add the missing one.
*
* ## What this does
* Install a synchronous, in-thread ESM resolution hook (`module.registerHooks`,
* Node >= 22.15) that redirects `onnxruntime-common` to a copy gitnexus can
* resolve — but only when the default resolver fails. The redirect target is
* preferentially the `onnxruntime-common` that `onnxruntime-node` (the native
* binding transformers actually loads) itself depends on, so the redirected copy
* is version-matched to that binding even under `pnpm dlx` — where gitnexus'
* npm-style `overrides` block does NOT apply, because it is honoured only from a
* root manifest and gitnexus is a transitive dependency there. It falls back to
* gitnexus' own direct `onnxruntime-common` dependency when that chain can't be
* walked. onnxruntime-common is a stable, pure-JS package whose `Tensor` surface
* is unchanged across 1.24–1.26, so either target is API-compatible. On working
* layouts the default resolver succeeds first and the hook never fires, so
* behaviour is unchanged.
*
* `registerHooks` (synchronous, in-thread) is preferred over the older
* `module.register` (async, off-thread, now deprecated — DEP0205, removed in
* Node 26): the redirect is a one-line conditional that needs no worker thread,
* no separate hook module, and no `data` marshalling.
*
* ## Safety
* Best-effort and idempotent. The hook is installed lazily, only on the
* local-embedding code path (after parsing), so it is never registered during
* analysis, in the parse workers, or in HTTP embedding mode. Once installed it
* is process-global: its resolve closure runs for every subsequent module
* resolution, but it passes all of them through untouched and only substitutes a
* result for the exact `onnxruntime-common` specifier when that specifier is
* genuinely absent — so it cannot mask an unrelated resolution error, and the
* per-resolution cost is a single string comparison.
*
* `module.registerHooks` is marked `@experimental` and requires Node >= 22.15
* (the gitnexus engines floor is >= 22.0.0). On older runtimes it is absent and
* this is a graceful no-op: embeddings then resolve onnxruntime-common exactly
* as before — fine on hoisted layouts. Any failure during installation is
* swallowed.
*/
import { registerHooks, createRequire } from 'node:module';
import { pathToFileURL } from 'node:url';
import { logger } from '../logger.js';

let attempted = false;

/**
* Compute the file: URL the hook redirects `onnxruntime-common` to.
*
* Prefer the copy `onnxruntime-node` (the native binding transformers loads)
* depends on, so the redirected module is version-matched to the binding even
* under `pnpm dlx`, where transformers keeps its own pinned onnxruntime-node.
* The walk resolves transformers' MAIN entry — NOT `@huggingface/transformers/
* package.json`, which transformers' `exports` map blocks
* (`ERR_PACKAGE_PATH_NOT_EXPORTED`) — then onnxruntime-node, then its
* onnxruntime-common. Falls back to gitnexus' own direct dependency (always
* resolvable from our scope) when any step fails.
*/
const resolveOnnxRuntimeCommonUrl = (): string => {
const require = createRequire(import.meta.url);
try {
const transformersMain = require.resolve('@huggingface/transformers');
const ortNodePkg = createRequire(transformersMain).resolve('onnxruntime-node/package.json');
const common = createRequire(ortNodePkg).resolve('onnxruntime-common');
return pathToFileURL(common).href;
} catch {
return pathToFileURL(require.resolve('onnxruntime-common')).href;
}
};

/**
* Idempotently install the onnxruntime-common resolution fallback. Call once
* immediately before the dynamic `import('@huggingface/transformers')` on the
* local-embedding path.
*/
export const ensureOnnxRuntimeCommonResolvable = (): void => {
if (attempted) return;
// Mark attempted up-front: a failed attempt must not retry on every
// initEmbedder() call, and the hook is process-global — once is enough.
attempted = true;

try {
// Node < 22.15 (the gitnexus engines floor is >= 22.0.0): no synchronous
// hooks API. Degrade gracefully — the import still works on hoisted layouts.
if (typeof registerHooks !== 'function') return;

const redirectUrl = resolveOnnxRuntimeCommonUrl();

registerHooks({
resolve(specifier, context, nextResolve) {
if (specifier !== 'onnxruntime-common') return nextResolve(specifier, context);
// Honour a real, package-manager-provided copy when one is on the path
// (npm / hoisted pnpm); only substitute ours when the specifier is
// genuinely absent.
try {
return nextResolve(specifier, context);
} catch (err) {
// The phantom import surfaces as ERR_MODULE_NOT_FOUND (or, for a
// present-but-exports-broken copy, ERR_PACKAGE_PATH_NOT_EXPORTED).
// Rethrow anything else so a genuinely broken install is not masked.
const code = (err as { code?: string } | null | undefined)?.code;
if (code === 'ERR_MODULE_NOT_FOUND' || code === 'ERR_PACKAGE_PATH_NOT_EXPORTED') {
return { url: redirectUrl, shortCircuit: true };
}
throw err;
}
},
});
logger.debug({ redirectUrl }, 'Installed onnxruntime-common resolution fallback (#307)');
} catch (err) {
// Never block embeddings on the fallback. On layouts where the package
// manager already resolves onnxruntime-common this is unnecessary anyway.
logger.debug(
{ err: err instanceof Error ? err.message : String(err) },
'onnxruntime-common resolution fallback not installed',
);
}
};
4 changes: 4 additions & 0 deletions gitnexus/src/mcp/core/embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {
withHfDownloadRetry,
} from '../../core/embeddings/hf-env.js';
import { getLocalEmbeddingRuntimeBlocker } from '../../core/embeddings/runtime-support.js';
import { ensureOnnxRuntimeCommonResolvable } from '../../core/embeddings/onnxruntime-common-resolver.js';
import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js';

import { logger } from '../../core/logger.js';
Expand Down Expand Up @@ -65,6 +66,9 @@ export const initEmbedder = async (): Promise<FeatureExtractionPipeline> => {
try {
// Lazy-load transformers.js only after the runtime guard has passed, so
// unsupported platforms never reach the native ONNX import (#1515).
// Under pnpm-strict / `pnpm dlx`, transformers' phantom `onnxruntime-common`
// import is unresolvable; register the fallback resolver first (#307).
ensureOnnxRuntimeCommonResolvable();
const { pipeline, env } = await import('@huggingface/transformers');

env.allowLocalModels = false;
Expand Down
132 changes: 132 additions & 0 deletions gitnexus/test/unit/onnxruntime-common-resolver.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import { describe, it, expect, vi, afterEach } from 'vitest';

/**
* Tests for the #307 pnpm-strict / `pnpm dlx` fix: a synchronous in-thread ESM
* resolution hook (`module.registerHooks`) that redirects @huggingface/
* transformers' phantom `onnxruntime-common` import to gitnexus' own copy.
*
* Each test mocks `node:module` with a chosen `registerHooks` (a spy, or
* `undefined` to simulate Node < 22.15) so we can assert one-shot installation,
* graceful degradation, and the redirect/passthrough/rethrow logic of the
* resolve closure — without mutating the real process loader.
*/

const RESOLVER = '../../src/core/embeddings/onnxruntime-common-resolver.js';

/**
* (Re)load the resolver with a chosen `registerHooks` mocked into node:module.
* `vi.resetModules()` + the fresh `import()` re-initialises the module-level
* one-shot guard, so each test gets a pristine resolver with no shared state.
*/
async function loadResolver(registerHooks: unknown) {
vi.resetModules();
vi.doMock('node:module', async (importOriginal) => {
const orig = await importOriginal<typeof import('node:module')>();
return { ...orig, registerHooks };
});
return import(RESOLVER);
}

const ctx = { conditions: [], importAttributes: {} } as never;
const moduleNotFound = (): Error => {
const e = new Error("Cannot find package 'onnxruntime-common'") as Error & { code: string };
e.code = 'ERR_MODULE_NOT_FOUND';
return e;
};

afterEach(() => {
vi.doUnmock('node:module');
});

describe('ensureOnnxRuntimeCommonResolvable — installation', () => {
it('installs the resolve hook exactly once (idempotent)', async () => {
const spy = vi.fn();
const mod = await loadResolver(spy);

mod.ensureOnnxRuntimeCommonResolvable();
mod.ensureOnnxRuntimeCommonResolvable(); // second call is a no-op

expect(spy).toHaveBeenCalledTimes(1);
expect(typeof spy.mock.calls[0][0].resolve).toBe('function');
});

it('no-ops gracefully when registerHooks is unavailable (Node < 22.15)', async () => {
const mod = await loadResolver(undefined);
// Must not throw even though there is no synchronous-hooks API to call.
expect(() => mod.ensureOnnxRuntimeCommonResolvable()).not.toThrow();
});

it('is best-effort: swallows a registerHooks() failure instead of throwing into the embedder', async () => {
const mod = await loadResolver(
vi.fn(() => {
throw new Error('hook-install-failed');
}),
);
// The call site (initEmbedder) does not guard the return; a throw here would
// break `analyze --embeddings`. The outer try/catch must absorb it.
expect(() => mod.ensureOnnxRuntimeCommonResolvable()).not.toThrow();
});
});

describe('ensureOnnxRuntimeCommonResolvable — resolve hook behaviour', () => {
/** Install the fallback and return the resolve closure handed to registerHooks. */
async function captureResolve() {
const spy = vi.fn();
const mod = await loadResolver(spy);
mod.ensureOnnxRuntimeCommonResolvable();
return spy.mock.calls[0][0].resolve as (
s: string,
c: never,
n: (s: string, c: never) => unknown,
) => unknown;
}

it('passes a successful default resolution through unchanged (no-op on hoisted layouts)', async () => {
const resolve = await captureResolve();
const real = { url: 'file:///real/onnxruntime-common/index.js', shortCircuit: true };
const next = vi.fn(() => real);

const res = resolve('onnxruntime-common', ctx, next);

expect(next).toHaveBeenCalledTimes(1);
expect(res).toBe(real); // the real resolution, NOT a redirect
});

it('redirects onnxruntime-common to the gitnexus copy when default resolution fails', async () => {
const resolve = await captureResolve();
const next = vi.fn(() => {
throw moduleNotFound();
});

const res = resolve('onnxruntime-common', ctx, next) as { url: string; shortCircuit: boolean };

expect(res.shortCircuit).toBe(true);
// The real resolved onnxruntime-common in node_modules (require.resolve runs
// for real here) — not just any path containing the substring.
expect(res.url).toMatch(/^file:\/\/.*\/node_modules\/onnxruntime-common\/.*\.js$/);
});

it('never masks an unrelated resolution failure (other specifiers rethrow)', async () => {
const resolve = await captureResolve();
const err = moduleNotFound();
const next = vi.fn(() => {
throw err;
});

expect(() => resolve('some-other-package', ctx, next)).toThrow(err);
});

it('rethrows when onnxruntime-common fails for a non-absence reason', async () => {
const resolve = await captureResolve();
// A present-but-otherwise-broken resolution (not a missing package) must
// surface, not be silently papered over with gitnexus' copy.
const err = Object.assign(new Error('bad specifier'), {
code: 'ERR_INVALID_MODULE_SPECIFIER',
});
const next = vi.fn(() => {
throw err;
});

expect(() => resolve('onnxruntime-common', ctx, next)).toThrow(err);
});
});
Loading