diff --git a/packages/qvac-lib-infer-nmtcpp/CHANGELOG.md b/packages/qvac-lib-infer-nmtcpp/CHANGELOG.md index 926842333b..29271e089e 100644 --- a/packages/qvac-lib-infer-nmtcpp/CHANGELOG.md +++ b/packages/qvac-lib-infer-nmtcpp/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.1.1] - 2026-05-04 + +### Added + +- Exported `RuntimeStats` interface in `index.d.ts` with fields: `totalTokens`, `totalTime`, `decodeTime`, `TPS` (required), `encodeTime`, `TTFT` (optional — GGML backend only). Matches C++ backend output for SDK type-safety. + ## [2.1.0] - 2026-04-22 ### Added diff --git a/packages/qvac-lib-infer-nmtcpp/README.md b/packages/qvac-lib-infer-nmtcpp/README.md index c4b439f53d..0d6367be19 100644 --- a/packages/qvac-lib-infer-nmtcpp/README.md +++ b/packages/qvac-lib-infer-nmtcpp/README.md @@ -368,7 +368,7 @@ try { // Access performance statistics (if enabled with opts.stats) if (response.stats) { - console.log('Translation completed in:', response.stats.totalTime, 'ms') + console.log('Translation completed in:', response.stats.totalTime, 's') } } catch (error) { console.error('Translation failed:', error) diff --git a/packages/qvac-lib-infer-nmtcpp/index.d.ts b/packages/qvac-lib-infer-nmtcpp/index.d.ts index ba28e02daf..41e996a67b 100644 --- a/packages/qvac-lib-infer-nmtcpp/index.d.ts +++ b/packages/qvac-lib-infer-nmtcpp/index.d.ts @@ -93,6 +93,31 @@ export interface InferenceClientState { destroyed: boolean } +/** + * Stats returned via `response.stats` when the addon is constructed with + * `opts.stats = true`. Field set differs by backend: + * + * - Bergamot emits: `totalTokens`, `totalTime`, `decodeTime`, `TPS`. + * - GGML/IndicTrans emits the above plus `encodeTime` and `TTFT`. + * + * Units: + * - `totalTime`, `encodeTime`, `decodeTime` — seconds (double). + * - `TTFT` (Time-To-First-Token) — milliseconds (double). + * - `TPS` (Tokens-Per-Second) — tokens / second (double). + * - `totalTokens` — integer count. + * + * Note: pivot translations may emit keys prefixed with the model name + * (e.g. `"BERGAMOT : ->TPS"`). This interface models the non-pivot shape. + */ +export interface RuntimeStats { + totalTokens: number + totalTime: number + decodeTime: number + TPS: number + encodeTime?: number + TTFT?: number +} + export default class TranslationNmtcpp { static readonly ModelTypes: TranslationNmtcppModelTypes constructor(args: TranslationNmtcppArgs) diff --git a/packages/qvac-lib-infer-nmtcpp/package.json b/packages/qvac-lib-infer-nmtcpp/package.json index e81914c57d..6415c84248 100644 --- a/packages/qvac-lib-infer-nmtcpp/package.json +++ b/packages/qvac-lib-infer-nmtcpp/package.json @@ -1,6 +1,6 @@ { "name": "@qvac/translation-nmtcpp", - "version": "2.1.0", + "version": "2.1.1", "description": "translation addon for qvac", "addon": true, "engines": {