tetherto · GustavoA1604 · Apr 15, 2026 · Apr 15, 2026
@@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.6.2]
+
+### Changed
+- Fixed chunking issue re-introduced in 0.6.0 in which the inference output was not streamed but instead returned as a single batched result of the end.
+
+## [0.6.1]
+
+### Changed
+
+- Changed `@qvac/transcription-whispercpp` package visibility on NPM from private to public
+
 ## [0.6.0]
 
 This release is a significant interface modernisation. The constructor switches to a local-files map, model download is removed from the load path, concurrent inference runs are serialised instead of rejected, and the class no longer extends `BaseInference`.
@@ -52,6 +63,11 @@ When `exclusiveRun` is enabled (the default), a second call to `run()` or `runSt
 
 `TranscriptionWhispercppFiles` and `InferenceClientState` are now exported from the `TranscriptionWhispercpp` namespace. Lifecycle methods (`load`, `unload`, `destroy`, `cancel`, `pause`, `unpause`, `stop`, `status`, `getState`) are now explicitly declared in `index.d.ts`.
 
+## [0.5.6]
+
+### Changed
+- Fixed chunking issue introduced in 0.5.0 in which the inference output was not streamed but instead returned as a single batched result of the end.
+
 ## [0.5.5]
 
 ### Changed

@@ -155,7 +155,14 @@ inline js_value_t* runJob(js_env_t* env, js_callback_info_t* info) try {
   vector<uint8_t> audioBytes =
       js::TypedArray<uint8_t>(env, jsInput).as<std::vector<uint8_t>>(env);
   auto samples = WhisperModel::preprocessAudioData(audioBytes, audioFormat);
-  return instance.runJob(std::any(std::move(samples)));
+
+  WhisperModel::AnyInput anyInput;
+  anyInput.input = std::move(samples);
+  anyInput.outputCallback = [&instance](const Transcript& transcript) {
+    instance.addonCpp->outputQueue->queueResult(std::any(transcript));
+  };
+
+  return instance.runJob(std::any(std::move(anyInput)));
 }
 JSCATCH
 

@@ -389,6 +389,7 @@ std::any WhisperModel::process(const std::any& input) {
 
   if (shouldOverrideCallback) {
     on_segment_ = previousOutputCallback;
+    return Output{};
   }
 
   return output_;

@@ -1,6 +1,6 @@
 {
   "name": "@qvac/transcription-whispercpp",
-  "version": "0.6.0",
+  "version": "0.6.2",
   "description": "transcription addon for qvac",
   "addon": true,
   "engines": {

@@ -14,19 +14,21 @@ async function transcribeChunk (model, audioStream, offsetMs, durationMs, audioC
     }
   })
 
-  // audioStream is provided by caller to avoid reading the whole file inside this function
-
   const response = await model.run(audioStream)
 
   const results = []
+  let updateCallCount = 0
+  let maxBatchSize = 0
   response.onUpdate((outputArr) => {
+    updateCallCount++
     const items = Array.isArray(outputArr) ? outputArr : [outputArr]
+    if (items.length > maxBatchSize) maxBatchSize = items.length
     results.push(...items)
   })
 
   await response.await()
 
-  return results
+  return { results, updateCallCount, maxBatchSize }
 }
 
 const { modelPath } = getTestPaths()
@@ -99,6 +101,7 @@ test('Audio context chunking - 10 minute audio file with 30s chunks', { skip: is
     const allResults = []
     let errorCount = 0
     let chunksWithSegments = 0
+    let batchedDeliveryCount = 0
 
     // Process each chunk - always pass full audio, only change offset_ms, duration_ms, audio_ctx
     let currentOffsetSeconds = 0
@@ -110,9 +113,9 @@ test('Audio context chunking - 10 minute audio file with 30s chunks', { skip: is
 
       const fullAudioStream = createAudioStream(fullAudioBuffer)
 
-      let results = []
+      let chunk = { results: [], updateCallCount: 0, maxBatchSize: 0 }
       try {
-        results = await transcribeChunk(
+        chunk = await transcribeChunk(
           model,
           fullAudioStream,
           currentOffsetSeconds * 1000,
@@ -126,11 +129,16 @@ test('Audio context chunking - 10 minute audio file with 30s chunks', { skip: is
 
       currentOffsetSeconds += chunkDuration
 
-      if (results.length > 0) {
-        const text = results.map(s => s.text).join(' ').replace(/\s+/g, ' ').trim()
+      if (chunk.results.length > 0) {
+        const text = chunk.results.map(s => s.text).join(' ').replace(/\s+/g, ' ').trim()
+        console.log(`  → segments=${chunk.results.length} updates=${chunk.updateCallCount} maxBatch=${chunk.maxBatchSize}`)
         console.log(`  → ${text}\n`)
-        allResults.push(...results)
+        allResults.push(...chunk.results)
         chunksWithSegments++
+
+        if (chunk.results.length > 1 && chunk.updateCallCount === 1) {
+          batchedDeliveryCount++
+        }
       } else {
         console.log('  → [no output]\n')
       }
@@ -141,13 +149,19 @@ test('Audio context chunking - 10 minute audio file with 30s chunks', { skip: is
     console.log(`Total chunks processed: ${totalChunks}`)
     console.log(`Chunks with segments: ${chunksWithSegments}`)
     console.log(`Chunk errors: ${errorCount}`)
+    console.log(`Batched deliveries (regression): ${batchedDeliveryCount}`)
     console.log(`Duration processed: ${totalDurationSeconds.toFixed(1)}s`)
 
     // Assertions
     t.ok(allResults.length > 0, 'Should produce transcription segments')
     t.is(chunksWithSegments, totalChunks, 'Should transcribe exactly totalChunks chunks')
     t.is(errorCount, 0, 'No chunk errors or exceptions')
 
+    t.is(
+      batchedDeliveryCount, 0,
+      'Segments must be streamed incrementally (not all batched into a single onUpdate call)'
+    )
+
     // Verify segments have required properties
     if (allResults.length > 0) {
       const firstSegment = allResults[0]

@@ -117,7 +117,7 @@ test('Streaming transcript output preserves segment ordering', async (t) => {
 
   const outputEvents = events.filter(e => e.event === 'Output' && e.jobId === 1)
   t.alike(
-    outputEvents.map(e => e.output.text),
+    outputEvents.map(e => e.output[0].text),
     ['segment-0', 'segment-1', 'segment-2'],
     'Output segments should keep original ordering'
   )

@@ -78,9 +78,10 @@ test('VAD mode processes audio with voice activity detection', async (t) => {
 
   if (outputEvents.length > 0) {
     t.ok(outputEvents[0].output, 'Should have transcription output')
-    t.is(typeof outputEvents[0].output, 'object', 'Output should be transcript object')
-    t.ok(outputEvents[0].output.text.includes('Mock transcription') ||
-      outputEvents[0].output.text.includes('Silent audio detected'),
+    t.ok(Array.isArray(outputEvents[0].output), 'Output should be wrapped in array')
+    const transcript = outputEvents[0].output[0]
+    t.ok(transcript.text.includes('Mock transcription') ||
+      transcript.text.includes('Silent audio detected'),
     'Should contain mock transcription or silence detection text')
   }
 

@@ -90,6 +90,22 @@ class WhisperInterface {
 
     if (mappedEvent === 'Output') {
       this._setState(state.PROCESSING)
+      if (this._outputCb != null) {
+        const isTranscriptArray = Array.isArray(data) && data.length > 0 &&
+          typeof data[0]?.text === 'string'
+        const isSingleTranscript = !Array.isArray(data) &&
+          data && typeof data === 'object' && typeof data.text === 'string'
+        if (isTranscriptArray) {
+          for (const segment of data) {
+            this._outputCb(addon, 'Output', jobId, [segment], null)
+          }
+        } else if (isSingleTranscript) {
+          this._outputCb(addon, 'Output', jobId, [data], null)
+        } else {
+          this._outputCb(addon, 'Output', jobId, data, null)
+        }
+      }
+      return
     }
 
     if (this._outputCb != null) {