Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions packages/qvac-lib-infer-whispercpp/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.6.2]

### Changed
- Fixed chunking issue re-introduced in 0.6.0 in which the inference output was not streamed but instead returned as a single batched result of the end.

## [0.6.1]

### Changed

- Changed `@qvac/transcription-whispercpp` package visibility on NPM from private to public

## [0.6.0]

This release is a significant interface modernisation. The constructor switches to a local-files map, model download is removed from the load path, concurrent inference runs are serialised instead of rejected, and the class no longer extends `BaseInference`.
Expand Down Expand Up @@ -52,6 +63,11 @@ When `exclusiveRun` is enabled (the default), a second call to `run()` or `runSt

`TranscriptionWhispercppFiles` and `InferenceClientState` are now exported from the `TranscriptionWhispercpp` namespace. Lifecycle methods (`load`, `unload`, `destroy`, `cancel`, `pause`, `unpause`, `stop`, `status`, `getState`) are now explicitly declared in `index.d.ts`.

## [0.5.6]

### Changed
- Fixed chunking issue introduced in 0.5.0 in which the inference output was not streamed but instead returned as a single batched result of the end.

## [0.5.5]

### Changed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,14 @@ inline js_value_t* runJob(js_env_t* env, js_callback_info_t* info) try {
vector<uint8_t> audioBytes =
js::TypedArray<uint8_t>(env, jsInput).as<std::vector<uint8_t>>(env);
auto samples = WhisperModel::preprocessAudioData(audioBytes, audioFormat);
return instance.runJob(std::any(std::move(samples)));

WhisperModel::AnyInput anyInput;
anyInput.input = std::move(samples);
anyInput.outputCallback = [&instance](const Transcript& transcript) {
instance.addonCpp->outputQueue->queueResult(std::any(transcript));
};

return instance.runJob(std::any(std::move(anyInput)));
}
JSCATCH

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ std::any WhisperModel::process(const std::any& input) {

if (shouldOverrideCallback) {
on_segment_ = previousOutputCallback;
return Output{};
}

return output_;
Expand Down
2 changes: 1 addition & 1 deletion packages/qvac-lib-infer-whispercpp/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@qvac/transcription-whispercpp",
"version": "0.6.0",
"version": "0.6.2",
"description": "transcription addon for qvac",
"addon": true,
"engines": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,21 @@ async function transcribeChunk (model, audioStream, offsetMs, durationMs, audioC
}
})

// audioStream is provided by caller to avoid reading the whole file inside this function

const response = await model.run(audioStream)

const results = []
let updateCallCount = 0
let maxBatchSize = 0
response.onUpdate((outputArr) => {
updateCallCount++
const items = Array.isArray(outputArr) ? outputArr : [outputArr]
if (items.length > maxBatchSize) maxBatchSize = items.length
results.push(...items)
})

await response.await()

return results
return { results, updateCallCount, maxBatchSize }
}

const { modelPath } = getTestPaths()
Expand Down Expand Up @@ -99,6 +101,7 @@ test('Audio context chunking - 10 minute audio file with 30s chunks', { skip: is
const allResults = []
let errorCount = 0
let chunksWithSegments = 0
let batchedDeliveryCount = 0

// Process each chunk - always pass full audio, only change offset_ms, duration_ms, audio_ctx
let currentOffsetSeconds = 0
Expand All @@ -110,9 +113,9 @@ test('Audio context chunking - 10 minute audio file with 30s chunks', { skip: is

const fullAudioStream = createAudioStream(fullAudioBuffer)

let results = []
let chunk = { results: [], updateCallCount: 0, maxBatchSize: 0 }
try {
results = await transcribeChunk(
chunk = await transcribeChunk(
model,
fullAudioStream,
currentOffsetSeconds * 1000,
Expand All @@ -126,11 +129,16 @@ test('Audio context chunking - 10 minute audio file with 30s chunks', { skip: is

currentOffsetSeconds += chunkDuration

if (results.length > 0) {
const text = results.map(s => s.text).join(' ').replace(/\s+/g, ' ').trim()
if (chunk.results.length > 0) {
const text = chunk.results.map(s => s.text).join(' ').replace(/\s+/g, ' ').trim()
console.log(` → segments=${chunk.results.length} updates=${chunk.updateCallCount} maxBatch=${chunk.maxBatchSize}`)
console.log(` → ${text}\n`)
allResults.push(...results)
allResults.push(...chunk.results)
chunksWithSegments++

if (chunk.results.length > 1 && chunk.updateCallCount === 1) {
batchedDeliveryCount++
}
} else {
console.log(' → [no output]\n')
}
Expand All @@ -141,13 +149,19 @@ test('Audio context chunking - 10 minute audio file with 30s chunks', { skip: is
console.log(`Total chunks processed: ${totalChunks}`)
console.log(`Chunks with segments: ${chunksWithSegments}`)
console.log(`Chunk errors: ${errorCount}`)
console.log(`Batched deliveries (regression): ${batchedDeliveryCount}`)
console.log(`Duration processed: ${totalDurationSeconds.toFixed(1)}s`)

// Assertions
t.ok(allResults.length > 0, 'Should produce transcription segments')
t.is(chunksWithSegments, totalChunks, 'Should transcribe exactly totalChunks chunks')
t.is(errorCount, 0, 'No chunk errors or exceptions')

t.is(
batchedDeliveryCount, 0,
'Segments must be streamed incrementally (not all batched into a single onUpdate call)'
)

// Verify segments have required properties
if (allResults.length > 0) {
const firstSegment = allResults[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ test('Streaming transcript output preserves segment ordering', async (t) => {

const outputEvents = events.filter(e => e.event === 'Output' && e.jobId === 1)
t.alike(
outputEvents.map(e => e.output.text),
outputEvents.map(e => e.output[0].text),
['segment-0', 'segment-1', 'segment-2'],
'Output segments should keep original ordering'
)
Expand Down
7 changes: 4 additions & 3 deletions packages/qvac-lib-infer-whispercpp/test/unit/vad.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ test('VAD mode processes audio with voice activity detection', async (t) => {

if (outputEvents.length > 0) {
t.ok(outputEvents[0].output, 'Should have transcription output')
t.is(typeof outputEvents[0].output, 'object', 'Output should be transcript object')
t.ok(outputEvents[0].output.text.includes('Mock transcription') ||
outputEvents[0].output.text.includes('Silent audio detected'),
t.ok(Array.isArray(outputEvents[0].output), 'Output should be wrapped in array')
const transcript = outputEvents[0].output[0]
t.ok(transcript.text.includes('Mock transcription') ||
transcript.text.includes('Silent audio detected'),
'Should contain mock transcription or silence detection text')
}

Expand Down
16 changes: 16 additions & 0 deletions packages/qvac-lib-infer-whispercpp/whisper.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,22 @@ class WhisperInterface {

if (mappedEvent === 'Output') {
this._setState(state.PROCESSING)
if (this._outputCb != null) {
const isTranscriptArray = Array.isArray(data) && data.length > 0 &&
typeof data[0]?.text === 'string'
const isSingleTranscript = !Array.isArray(data) &&
data && typeof data === 'object' && typeof data.text === 'string'
if (isTranscriptArray) {
for (const segment of data) {
this._outputCb(addon, 'Output', jobId, [segment], null)
}
} else if (isSingleTranscript) {
this._outputCb(addon, 'Output', jobId, [data], null)
} else {
this._outputCb(addon, 'Output', jobId, data, null)
}
}
return
}

if (this._outputCb != null) {
Expand Down
Loading