Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions packages/qvac-lib-infer-nmtcpp/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [2.0.3] - 2026-04-15

### Added

- `lib/indictrans-model-fetcher.js` — downloads IndicTrans2 GGML models from the QVAC registry when not found locally
- `@qvac/registry-client` as devDependency for IndicTrans model downloads
- Package export for `./lib/indictrans-model-fetcher`

### Changed

- `examples/pivot.example.js` — auto-downloads Bergamot models (es-en, en-it) from Firefox CDN via `ensureBergamotModelFiles()`
- `examples/indictrans.js` — auto-downloads IndicTrans model from QVAC registry via `ensureIndicTransModelFile()`

## [2.0.2] - 2026-04-14

### Fixed
Expand Down
20 changes: 12 additions & 8 deletions packages/qvac-lib-infer-nmtcpp/examples/indictrans.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* This example demonstrates translation using the IndicTrans2 model
* for English to Hindi translation (eng_Latn -> hin_Deva).
*
* Requires a local IndicTrans model file.
* The model file is downloaded automatically from the QVAC registry if not found locally.
*
* Usage:
* bare examples/indictrans.js
Expand All @@ -17,9 +17,14 @@
*/

const TranslationNmtcpp = require('../index')
const fs = require('bare-fs')
const path = require('bare-path')
const process = require('bare-process')

const {
ensureIndicTransModelFile,
getIndicTransFileName
} = require('../lib/indictrans-model-fetcher')

// ============================================================
// LOGGING CONFIGURATION
// Set VERBOSE=1 environment variable to enable C++ debug logs
Expand All @@ -38,13 +43,12 @@ const logger = VERBOSE
const text = 'How are you'

async function main () {
const modelPath = process.env.INDICTRANS_MODEL_PATH || './models/ggml-indictrans2-en-indic-dist-200M.bin'
// Use local model path if provided, otherwise auto-download from QVAC registry
const defaultModelPath = path.join('./model/indictrans', getIndicTransFileName())
const modelPath = process.env.INDICTRANS_MODEL_PATH || defaultModelPath

if (!fs.existsSync(modelPath)) {
console.log('IndicTrans model not found at:', modelPath)
console.log('Set INDICTRANS_MODEL_PATH env var or place model at ./models/ggml-indictrans2-en-indic-dist-200M.bin')
return
}
// Ensure model file is present (downloads from QVAC registry if not)
await ensureIndicTransModelFile(modelPath)

const model = new TranslationNmtcpp({
files: { model: modelPath },
Expand Down
41 changes: 20 additions & 21 deletions packages/qvac-lib-infer-nmtcpp/examples/pivot.example.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
* - Second model: English -> Italian (en-it)
* - Result: Spanish -> Italian translation via English pivot
*
* Requires local Bergamot model files for both language pairs.
* Model files are downloaded automatically from the Firefox CDN if not found locally.
*
* Usage:
* bare examples/pivot.example.js
Expand All @@ -19,10 +19,14 @@
*/

const TranslationNmtcpp = require('../index')
const fs = require('bare-fs')
const path = require('bare-path')
const process = require('bare-process')

const {
ensureBergamotModelFiles,
getBergamotFileNames
} = require('../lib/bergamot-model-fetcher')

// ============================================================
// LOGGING CONFIGURATION
// Set VERBOSE=1 environment variable to enable C++ debug logs
Expand Down Expand Up @@ -54,30 +58,25 @@ async function main () {
console.log(spanishText)
console.log('-----------------------------------------------------------\n')

const esenPath = process.env.BERGAMOT_ESEN_PATH || './models/es-en'
const enitPath = process.env.BERGAMOT_ENIT_PATH || './models/en-it'
// Use local model paths if provided, otherwise auto-download from Firefox CDN
const esenPath = process.env.BERGAMOT_ESEN_PATH || './model/bergamot/esen'
const enitPath = process.env.BERGAMOT_ENIT_PATH || './model/bergamot/enit'

const primaryModel = path.join(esenPath, 'model.esen.intgemm.alphas.bin')
const primaryVocab = path.join(esenPath, 'vocab.esen.spm')
const pivotModel = path.join(enitPath, 'model.enit.intgemm.alphas.bin')
const pivotVocab = path.join(enitPath, 'vocab.enit.spm')
// Ensure model files are present (downloads from Firefox CDN if not)
const esenDir = await ensureBergamotModelFiles('es', 'en', esenPath)
const enitDir = await ensureBergamotModelFiles('en', 'it', enitPath)

for (const f of [primaryModel, primaryVocab, pivotModel, pivotVocab]) {
if (!fs.existsSync(f)) {
console.log('Missing model file:', f)
console.log('\nSet BERGAMOT_ESEN_PATH and BERGAMOT_ENIT_PATH env vars or place models in ./models/es-en and ./models/en-it')
return
}
}
const esenFiles = getBergamotFileNames('es', 'en')
const enitFiles = getBergamotFileNames('en', 'it')

const model = new TranslationNmtcpp({
files: {
model: primaryModel,
srcVocab: primaryVocab,
dstVocab: primaryVocab,
pivotModel,
pivotSrcVocab: pivotVocab,
pivotDstVocab: pivotVocab
model: path.join(esenDir, esenFiles.modelName),
srcVocab: path.join(esenDir, esenFiles.srcVocabName),
dstVocab: path.join(esenDir, esenFiles.dstVocabName),
pivotModel: path.join(enitDir, enitFiles.modelName),
pivotSrcVocab: path.join(enitDir, enitFiles.srcVocabName),
pivotDstVocab: path.join(enitDir, enitFiles.dstVocabName)
},
params: {
srcLang: 'es',
Expand Down
136 changes: 136 additions & 0 deletions packages/qvac-lib-infer-nmtcpp/lib/indictrans-model-fetcher.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
'use strict'

/**
* IndicTrans Model Fetcher
*
* Downloads IndicTrans2 GGML model files from the QVAC model registry.
*
* This module does NOT touch Bergamot or OPUS models.
*/

const fs = require('bare-fs')
const path = require('bare-path')

// ============================================================================
// Model registry paths (from SDK models.ts)
// ============================================================================

const INDICTRANS_MODELS = {
'en-indic-200M-q4_0': {
registryPath: 'qvac_models_compiled/ggml/indictrans2/q4_0/ggml-indictrans2-en-indic-dist-200M/2026-01-01/ggml-indictrans2-en-indic-dist-200M-q4_0.bin',
registrySource: 's3',
filename: 'ggml-indictrans2-en-indic-dist-200M-q4_0.bin',
expectedMinSizeMB: 100
}
}

// ============================================================================
// Helpers
// ============================================================================

/**
* Checks whether a file exists and meets minimum size requirements.
*/
function hasValidModelFile (filePath, minSizeMB) {
try {
const stats = fs.statSync(filePath)
return stats.size >= minSizeMB * 1024 * 1024
} catch {
return false
}
}

// ============================================================================
// Download via QVAC Registry
// ============================================================================

/**
* Downloads an IndicTrans model file from the QVAC model registry.
*/
async function downloadIndicTransFromRegistry (modelKey, destPath) {
const { QVACRegistryClient } = require('@qvac/registry-client')

const modelInfo = INDICTRANS_MODELS[modelKey]
if (!modelInfo) {
throw new Error(`Unknown IndicTrans model key: ${modelKey}. Available: ${Object.keys(INDICTRANS_MODELS).join(', ')}`)
}

console.log(`[indictrans-fetcher] Downloading ${modelInfo.filename} from QVAC registry...`)

const client = new QVACRegistryClient()
await client.ready()

try {
const destDir = path.dirname(destPath)
fs.mkdirSync(destDir, { recursive: true })

const result = await client.downloadModel(
modelInfo.registryPath,
modelInfo.registrySource,
{ outputFile: destPath }
)

console.log(`[indictrans-fetcher] Download complete → ${result.artifact.path}`)

if (!hasValidModelFile(destPath, modelInfo.expectedMinSizeMB)) {
throw new Error(`Downloaded file seems corrupted (expected >${modelInfo.expectedMinSizeMB}MB)`)
}

return destPath
} finally {
await client.close()
}
}

// ============================================================================
// Public API
// ============================================================================

/**
* Ensures an IndicTrans model file is present at destPath.
*
* 1. If a valid model file already exists → returns immediately
* 2. Downloads from QVAC model registry
*
* @param {string} destPath Full path where the model file should be stored
* @param {string} [modelKey='en-indic-200M-q4_0'] Model variant key
* @returns {Promise<string>} Resolved path to the model file
*/
async function ensureIndicTransModelFile (destPath, modelKey = 'en-indic-200M-q4_0') {
const modelInfo = INDICTRANS_MODELS[modelKey]
if (!modelInfo) {
throw new Error(`Unknown IndicTrans model key: ${modelKey}. Available: ${Object.keys(INDICTRANS_MODELS).join(', ')}`)
}

if (hasValidModelFile(destPath, modelInfo.expectedMinSizeMB)) {
console.log(`[indictrans-fetcher] Model already available at ${destPath}`)
return destPath
}

return downloadIndicTransFromRegistry(modelKey, destPath)
}

/**
* Returns the default filename for an IndicTrans model variant.
*
* @param {string} [modelKey='en-indic-200M-q4_0'] Model variant key
* @returns {string} Filename
*/
function getIndicTransFileName (modelKey = 'en-indic-200M-q4_0') {
const modelInfo = INDICTRANS_MODELS[modelKey]
if (!modelInfo) {
throw new Error(`Unknown IndicTrans model key: ${modelKey}. Available: ${Object.keys(INDICTRANS_MODELS).join(', ')}`)
}
return modelInfo.filename
}

// ============================================================================
// Exports
// ============================================================================

module.exports = {
ensureIndicTransModelFile,
getIndicTransFileName,
downloadIndicTransFromRegistry,
INDICTRANS_MODELS
}
6 changes: 4 additions & 2 deletions packages/qvac-lib-infer-nmtcpp/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@qvac/translation-nmtcpp",
"version": "2.0.2",
"version": "2.0.3",
"description": "translation addon for qvac",
"addon": true,
"engines": {
Expand Down Expand Up @@ -66,6 +66,7 @@
"homepage": "https://github.com/tetherto/qvac/tree/main/packages/qvac-lib-infer-nmtcpp#readme",
"devDependencies": {
"@qvac/dl-base": "^0.1.0",
"@qvac/registry-client": "^0.4.0",
"bare-fetch": "^2.5.1",
"bare-fs": "^4.5.1",
"bare-process": "^4.2.2",
Expand All @@ -91,7 +92,8 @@
"default": "./addonLogging.js"
},
"./addonLogging.js": "./addonLogging.js",
"./lib/bergamot-model-fetcher": "./lib/bergamot-model-fetcher.js"
"./lib/bergamot-model-fetcher": "./lib/bergamot-model-fetcher.js",
"./lib/indictrans-model-fetcher": "./lib/indictrans-model-fetcher.js"
},
"types": "index.d.ts",
"standard": {
Expand Down
Loading