diff --git a/packages/sdk/bun.lock b/packages/sdk/bun.lock index cf305299b6..b8e52514b6 100644 --- a/packages/sdk/bun.lock +++ b/packages/sdk/bun.lock @@ -16,7 +16,7 @@ "@qvac/registry-client": "^0.2.0", "@qvac/transcription-parakeet": "^0.1.9", "@qvac/transcription-whispercpp": "^0.5.0", - "@qvac/translation-nmtcpp": "^0.3.9", + "@qvac/translation-nmtcpp": "^0.6.1", "@qvac/tts-onnx": "^0.6.1", "fast-safe-stringify": "2.1.1", "which-runtime": "^1.3.2", @@ -537,7 +537,7 @@ "@qvac/transcription-whispercpp": ["@qvac/transcription-whispercpp@0.5.0", "", { "dependencies": { "@qvac/decoder-audio": "^0.3.3", "@qvac/error": "^0.1.0", "@qvac/infer-base": "^0.2.0", "@qvac/logging": "^0.1.0", "bare-channel": "^5.2.2", "bare-ffmpeg": "^1.0.0-32", "bare-node-worker-threads": "^1.0.0", "bare-path": "^3.0.0", "bare-stream": "^2.7.0", "bare-worker": "^4.1.0", "path": "npm:bare-path", "process": "npm:bare-process@^4.2.2", "stream": "npm:bare-node-stream", "worker_threads": "npm:bare-node-worker-threads@^1.0.0" } }, "sha512-CxKeMlWj1Nhca+rK4gVtSPfZyiXkqegRjHua1Y/lsRNfERSQn3xMwnBzDj9yHn9CGFXV244KKE8/A1ZLXn6fBw=="], - "@qvac/translation-nmtcpp": ["@qvac/translation-nmtcpp@0.3.9", "", { "dependencies": { "@qvac/dl-hyperdrive": "^0.1.0", "@qvac/error": "^0.1.0", "@qvac/infer-base": "^0.2.0", "bare-path": "^3.0.0" } }, "sha512-hN3kaAdQ944OFI/UyupZFmTECdNfZKPMJDWy2joyx2UUi6i2Qv8G6hPPRj7JvCmrR5q9oJlOYN83+k4BQKFGpA=="], + "@qvac/translation-nmtcpp": ["@qvac/translation-nmtcpp@0.6.1", "", { "dependencies": { "@qvac/dl-hyperdrive": "^0.1.0", "@qvac/error": "^0.1.0", "@qvac/infer-base": "^0.2.0", "bare-path": "^3.0.0" } }, "sha512-TQ2vKYlcn7rSUJMns9J7Tin0+26ribfPw1PAYfPzRYSauCltNv/a5angjA55wE3DxxDG0+1i3afyCjoRcW+Dbw=="], "@qvac/tts-onnx": ["@qvac/tts-onnx@0.6.1", "", { "dependencies": { "@qvac/error": "^0.1.0", "@qvac/infer-base": "^0.1.0", "bare-fs": "^4.5.1", "bare-path": "^3.0.0" } }, "sha512-sIocUdkDqzwZs6qWofdIZCqvNII48NFViw4qii8wKiju439hI4LY+CYfaSnQPHwDurBdQXXYjK8lG8Dz2GBQRA=="], diff --git a/packages/sdk/examples/translation/translation-bergamot-pivot.ts b/packages/sdk/examples/translation/translation-bergamot-pivot.ts new file mode 100644 index 0000000000..b712832c61 --- /dev/null +++ b/packages/sdk/examples/translation/translation-bergamot-pivot.ts @@ -0,0 +1,83 @@ +import {BERGAMOT_ES_EN, BERGAMOT_EN_IT, loadModel, translate, unloadModel} from "@qvac/sdk"; + +/** + * Example: Pivot Translation with Bergamot + * + * Demonstrates translating Spanish to Italian through English as a pivot language. + * This requires two models: + * 1. Spanish → English (primary model) + * 2. English → Italian (pivot model) + * + * The API structure follows the standard Bergamot model pattern: + * - modelSrc: Primary translation model + * - modelConfig: Configuration with Bergamot-specific settings + * - modelConfig.pivotModel: Configuration for the secondary model + */ + +// Spanish to Italian via English pivot example +try { + // Load the primary model (Spanish → English) with pivot configuration + const modelId = await loadModel({ + modelSrc: BERGAMOT_ES_EN, // Primary model: Spanish → English + modelType: "nmt", + modelConfig: { + engine: "Bergamot", + from: "es", + to: "it", // Final target language (SDK handles the pivot internally) + beamsize: 4, + normalize: 1, + temperature: 0.3, + topk: 100, + // Pivot model configuration (English → Italian) + pivotModel: { + modelSrc: BERGAMOT_EN_IT, // Source for English → Italian model + // Bergamot-specific generation parameters for pivot model + beamsize: 4, + temperature: 0.3, + topk: 100, + normalize: 1, + lengthpenalty: 1.2, + } + }, + onProgress: (progress) => { + console.log(progress); + }, + }); + + console.log(`✅ Pivot translation model loaded: ${modelId}`); + console.log(" Primary: Spanish → English"); + console.log(" Pivot: English → Italian"); + + // Spanish text to translate + const spanishText = `Era una mañana soleada cuando María decidió visitar el mercado local. + Compró frutas frescas, verduras y flores para su casa. + El vendedor le recomendó las mejores manzanas de la temporada.`; + + console.log("\n📝 Original Spanish text:"); + console.log(spanishText); + + // Translate Spanish → English → Italian + const result = translate({ + modelId, + text: spanishText, + modelType: "nmt", + stream: false, + }); + + const italianText = await result.text; + + console.log("\n🇮🇹 Translated to Italian (via English):"); + console.log(italianText); + + // Expected output (approximate): + // "Era una mattina di sole quando Maria decise di visitare il mercato locale. + // Ha comprato frutta fresca, verdura e fiori per la sua casa. + // Il venditore ha consigliato le migliori mele della stagione." + + await unloadModel({ modelId }); + console.log("\n✅ Model unloaded successfully"); + +} catch (error) { + console.error("❌ Error:", error); + process.exit(1); +} diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 3eff5ff1f0..d9ea8d75aa 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -122,7 +122,7 @@ "@qvac/registry-client": "^0.2.0", "@qvac/transcription-parakeet": "^0.1.9", "@qvac/transcription-whispercpp": "^0.5.0", - "@qvac/translation-nmtcpp": "^0.3.9", + "@qvac/translation-nmtcpp": "^0.6.1", "@qvac/tts-onnx": "^0.6.1", "fast-safe-stringify": "2.1.1", "which-runtime": "^1.3.2", diff --git a/packages/sdk/schemas/load-model.ts b/packages/sdk/schemas/load-model.ts index 9e6a89919b..1ddac0d273 100644 --- a/packages/sdk/schemas/load-model.ts +++ b/packages/sdk/schemas/load-model.ts @@ -203,7 +203,13 @@ const loadModelOptionsToRequestBaseSchema = z.union([ modelType: ModelType.nmtcppTranslation, modelSrc: modelInputToSrcSchema.parse(data.modelSrc), modelName: modelInputToNameSchema.parse(data.modelSrc), - modelConfig: data.modelConfig, + modelConfig: (data.modelConfig.engine === "Bergamot" && data.modelConfig.pivotModel) ? { + ...data.modelConfig, + pivotModel: { + ...data.modelConfig.pivotModel, + modelSrc: modelInputToSrcSchema.parse(data.modelConfig.pivotModel.modelSrc), + }, + } : data.modelConfig, seed: data.seed ?? false, withProgress: data.withProgress ?? !!data.onProgress, delegate: data.delegate, diff --git a/packages/sdk/schemas/translation-config.ts b/packages/sdk/schemas/translation-config.ts index 24eae17b8c..ec5df3760c 100644 --- a/packages/sdk/schemas/translation-config.ts +++ b/packages/sdk/schemas/translation-config.ts @@ -93,6 +93,14 @@ const opusConfigSchema = nmtGenerationParamsSchema.extend({ to: z.enum(MARIAN_LANGUAGES), }); +// Pivot model configuration for Bergamot (for translation via intermediate language) +const bergamotPivotModelSchema = nmtGenerationParamsSchema.extend({ + modelSrc: modelSrcInputSchema, + srcVocabSrc: modelSrcInputSchema.optional(), + dstVocabSrc: modelSrcInputSchema.optional(), + normalize: z.number().optional(), +}).optional(); + // Bergamot engine config - supports BERGAMOT_LANGUAGES const bergamotConfigSchema = nmtGenerationParamsSchema.extend({ engine: z.literal("Bergamot"), @@ -101,6 +109,7 @@ const bergamotConfigSchema = nmtGenerationParamsSchema.extend({ srcVocabSrc: modelSrcInputSchema.optional(), dstVocabSrc: modelSrcInputSchema.optional(), normalize: z.number().optional(), + pivotModel: bergamotPivotModelSchema }); // IndicTrans engine config - supports INDICTRANS_LANGUAGES diff --git a/packages/sdk/server/bare/plugins/nmtcpp-translation/plugin.ts b/packages/sdk/server/bare/plugins/nmtcpp-translation/plugin.ts index b0b066d5f0..93dc75cae3 100644 --- a/packages/sdk/server/bare/plugins/nmtcpp-translation/plugin.ts +++ b/packages/sdk/server/bare/plugins/nmtcpp-translation/plugin.ts @@ -70,6 +70,9 @@ function createNmtModel( nmtConfig: NmtConfig, srcVocabPath?: string, dstVocabPath?: string, + pivotModelPath?: string, + pivotSrcVocabPath?: string, + pivotDstVocabPath?: string, ) { const { dirPath, basePath } = parseModelPath(modelPath); const loader = new FilesystemDL({ dirPath }); @@ -122,6 +125,24 @@ function createNmtModel( ...(nmtConfig.normalize !== undefined && { normalize: nmtConfig.normalize, }), + // Add pivot model configuration if present + ...(nmtConfig.pivotModel && { + bergamotPivotModel: (() => { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const {modelSrc, dstVocabSrc, srcVocabSrc, ...config} = nmtConfig.pivotModel + const { dirPath, basePath } = parseModelPath(pivotModelPath!); + return { + loader: asLoader(new FilesystemDL({ dirPath })), + modelName: basePath, + diskPath: dirPath, + config: { + ...config, + srcVocabPath: pivotSrcVocabPath, + dstVocabPath: pivotDstVocabPath + } + }; + })(), + }), }), }; @@ -149,8 +170,10 @@ export const nmtPlugin = definePlugin({ const { srcVocabSrc, dstVocabSrc, ...nmtConfig } = cfg as { srcVocabSrc?: ModelSrcInput; dstVocabSrc?: ModelSrcInput; + pivotModel?: { srcVocabSrc?: ModelSrcInput, dstVocabSrc?: ModelSrcInput, modelSrc: string }; } & NmtConfig; + if (nmtConfig.engine !== "Bergamot") { return { config: nmtConfig }; } @@ -176,14 +199,51 @@ export const nmtPlugin = definePlugin({ ); } - const [srcVocabPath, dstVocabPath] = await Promise.all([ + const pivotModel = nmtConfig.pivotModel + if (!pivotModel) { + const [srcVocabPath, dstVocabPath] = await Promise.all([ + ctx.resolveModelPath(srcSrc), + ctx.resolveModelPath(dstSrc), + ]); + + return { + config: nmtConfig, + artifacts: { srcVocabPath, dstVocabPath }, + }; + } + + let pivotSrcSrc: ModelSrcInput | undefined = pivotModel.srcVocabSrc; + let pivotDstSrc: ModelSrcInput | undefined = pivotModel.dstVocabSrc; + + if (!pivotSrcSrc || !pivotDstSrc) { + const pivotDerived = pivotModel.modelSrc.startsWith("pear://") + ? deriveBergamotVocabSources(pivotModel.modelSrc) + : pivotModel.modelSrc.startsWith("registry://") + ? deriveBergamotRegistryVocabSources(pivotModel.modelSrc) + : null; + if (pivotDerived) { + pivotSrcSrc = pivotSrcSrc ?? pivotDerived.srcVocabSrc; + pivotDstSrc = pivotDstSrc ?? pivotDerived.dstVocabSrc; + } + } + + if (!pivotSrcSrc || !pivotDstSrc) { + throw new ModelLoadFailedError( + "Bergamot pivot model requires srcVocabSrc and dstVocabSrc. Provide them in modelConfig or use a pear:// or registry:// model source for auto-derivation.", + ); + } + + const [srcVocabPath, dstVocabPath, pivotSrcVocabPath, pivotDstVocabPath, pivotModelPath] = await Promise.all([ ctx.resolveModelPath(srcSrc), ctx.resolveModelPath(dstSrc), + ctx.resolveModelPath(pivotSrcSrc), + ctx.resolveModelPath(pivotDstSrc), + ctx.resolveModelPath(pivotModel.modelSrc), ]); return { config: nmtConfig, - artifacts: { srcVocabPath, dstVocabPath }, + artifacts: { srcVocabPath, dstVocabPath, pivotSrcVocabPath, pivotDstVocabPath, pivotModelPath }, }; }, @@ -196,6 +256,9 @@ export const nmtPlugin = definePlugin({ nmtConfig, params.artifacts?.["srcVocabPath"], params.artifacts?.["dstVocabPath"], + params.artifacts?.["pivotModelPath"], + params.artifacts?.["pivotSrcVocabPath"], + params.artifacts?.["pivotDstVocabPath"], ); return { model, loader };