Skip to content

Commit 8bf6234

Browse files
authored
refactor: improve interruption detection word count logic (#809)
1 parent 2335196 commit 8bf6234

File tree

2 files changed

+180
-8
lines changed

2 files changed

+180
-8
lines changed

agents/src/voice/agent_activity.ts

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -625,11 +625,21 @@ export class AgentActivity implements RecognitionHooks {
625625
return;
626626
}
627627

628+
// Refactored interruption word count check:
629+
// - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
630+
// - Apply check to all STT results: empty string, undefined, or any length
631+
// - This ensures consistent behavior across all interruption scenarios
628632
if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
629633
const text = this.audioRecognition.currentTranscript;
630-
631634
// TODO(shubhra): better word splitting for multi-language
632-
if (text && splitWords(text, true).length < this.agentSession.options.minInterruptionWords) {
635+
636+
// Normalize text: convert undefined/null to empty string for consistent word counting
637+
const normalizedText = text ?? '';
638+
const wordCount = splitWords(normalizedText, true).length;
639+
640+
// Only allow interruption if word count meets or exceeds minInterruptionWords
641+
// This applies to all cases: empty strings, partial speech, and full speech
642+
if (wordCount < this.agentSession.options.minInterruptionWords) {
633643
return;
634644
}
635645
}
@@ -767,19 +777,30 @@ export class AgentActivity implements RecognitionHooks {
767777
return true;
768778
}
769779

780+
// Refactored interruption word count check for consistency with onVADInferenceDone:
781+
// - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
782+
// - Use consistent word splitting logic with splitWords (matching onVADInferenceDone pattern)
770783
if (
771784
this.stt &&
772785
this.turnDetection !== 'manual' &&
773786
this._currentSpeech &&
774787
this._currentSpeech.allowInterruptions &&
775788
!this._currentSpeech.interrupted &&
776-
this.agentSession.options.minInterruptionWords > 0 &&
777-
info.newTranscript.split(' ').length < this.agentSession.options.minInterruptionWords
789+
this.agentSession.options.minInterruptionWords > 0
778790
) {
779-
// avoid interruption if the new_transcript is too short
780-
this.cancelPreemptiveGeneration();
781-
this.logger.info('skipping user input, new_transcript is too short');
782-
return false;
791+
const wordCount = splitWords(info.newTranscript, true).length;
792+
if (wordCount < this.agentSession.options.minInterruptionWords) {
793+
// avoid interruption if the new_transcript contains fewer words than minInterruptionWords
794+
this.cancelPreemptiveGeneration();
795+
this.logger.info(
796+
{
797+
wordCount,
798+
minInterruptionWords: this.agentSession.options.minInterruptionWords,
799+
},
800+
'skipping user input, word count below minimum interruption threshold',
801+
);
802+
return false;
803+
}
783804
}
784805

785806
const oldTask = this._userTurnCompletedTask;
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
/**
6+
* Unit tests for interruption detection logic in AgentActivity.
7+
*
8+
* Tests the refactored minInterruptionWords check which ensures:
9+
* - Consistent word count filtering across all speech scenarios
10+
* - Proper handling of empty strings, undefined, and short speech
11+
* - Interruptions allowed only when word count meets or exceeds minInterruptionWords threshold
12+
*/
13+
import { describe, expect, it } from 'vitest';
14+
import { splitWords } from '../tokenize/basic/word.js';
15+
16+
describe('Interruption Detection - Word Counting', () => {
17+
describe('Word Splitting Behavior', () => {
18+
it('should count empty string as 0 words', () => {
19+
const text = '';
20+
const wordCount = splitWords(text, true).length;
21+
expect(wordCount).toBe(0);
22+
});
23+
24+
it('should count single word correctly', () => {
25+
const text = 'hello';
26+
const wordCount = splitWords(text, true).length;
27+
expect(wordCount).toBe(1);
28+
});
29+
30+
it('should count two words correctly', () => {
31+
const text = 'hello world';
32+
const wordCount = splitWords(text, true).length;
33+
expect(wordCount).toBe(2);
34+
});
35+
36+
it('should count multiple words correctly', () => {
37+
const text = 'hello this is a full sentence';
38+
const wordCount = splitWords(text, true).length;
39+
expect(wordCount).toBe(6);
40+
});
41+
42+
it('should handle punctuation correctly', () => {
43+
const text = 'hello, world!';
44+
const wordCount = splitWords(text, true).length;
45+
expect(wordCount).toBe(2);
46+
});
47+
48+
it('should handle multiple spaces between words', () => {
49+
const text = 'hello world';
50+
const wordCount = splitWords(text, true).length;
51+
expect(wordCount).toBe(2);
52+
});
53+
54+
it('should count whitespace-only string as 0 words', () => {
55+
const text = ' ';
56+
const wordCount = splitWords(text, true).length;
57+
expect(wordCount).toBe(0);
58+
});
59+
60+
it('should handle leading and trailing whitespace', () => {
61+
const text = ' hello world ';
62+
const wordCount = splitWords(text, true).length;
63+
expect(wordCount).toBe(2);
64+
});
65+
});
66+
67+
describe('Integration: Full Interruption Check Logic', () => {
68+
it('should block interruption for empty transcript with threshold 2', () => {
69+
const text = '';
70+
const minInterruptionWords = 2;
71+
72+
const normalizedText = text ?? '';
73+
const wordCount = splitWords(normalizedText, true).length;
74+
const shouldBlock = wordCount < minInterruptionWords;
75+
76+
expect(normalizedText).toBe('');
77+
expect(wordCount).toBe(0);
78+
expect(shouldBlock).toBe(true);
79+
});
80+
81+
it('should block interruption for undefined transcript with threshold 2', () => {
82+
const text: string | undefined = undefined;
83+
const minInterruptionWords = 2;
84+
85+
const normalizedText = text ?? '';
86+
const wordCount = splitWords(normalizedText, true).length;
87+
const shouldBlock = wordCount < minInterruptionWords;
88+
89+
expect(normalizedText).toBe('');
90+
expect(wordCount).toBe(0);
91+
expect(shouldBlock).toBe(true);
92+
});
93+
94+
it('should block interruption for single word with threshold 2', () => {
95+
const text = 'hello';
96+
const minInterruptionWords = 2;
97+
98+
const normalizedText = text ?? '';
99+
const wordCount = splitWords(normalizedText, true).length;
100+
const shouldBlock = wordCount < minInterruptionWords;
101+
102+
expect(normalizedText).toBe('hello');
103+
expect(wordCount).toBe(1);
104+
expect(shouldBlock).toBe(true);
105+
});
106+
107+
it('should allow interruption when word count exactly meets threshold', () => {
108+
const text = 'hello world';
109+
const minInterruptionWords = 2;
110+
111+
const normalizedText = text ?? '';
112+
const wordCount = splitWords(normalizedText, true).length;
113+
const shouldBlock = wordCount < minInterruptionWords;
114+
115+
expect(normalizedText).toBe('hello world');
116+
expect(wordCount).toBe(2);
117+
expect(shouldBlock).toBe(false);
118+
});
119+
120+
it('should allow interruption when word count exceeds threshold', () => {
121+
const text = 'hello this is a full sentence';
122+
const minInterruptionWords = 2;
123+
124+
const normalizedText = text ?? '';
125+
const wordCount = splitWords(normalizedText, true).length;
126+
const shouldBlock = wordCount < minInterruptionWords;
127+
128+
expect(normalizedText).toBe('hello this is a full sentence');
129+
expect(wordCount).toBe(6);
130+
expect(shouldBlock).toBe(false);
131+
});
132+
133+
it('should apply consistent word counting logic in both methods', () => {
134+
const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
135+
const threshold = 2;
136+
137+
transcripts.forEach((transcript) => {
138+
const text1 = transcript;
139+
const normalizedText1 = text1 ?? '';
140+
const wordCount1 = splitWords(normalizedText1, true).length;
141+
const shouldBlock1 = wordCount1 < threshold;
142+
143+
const wordCount2 = splitWords(transcript, true).length;
144+
const shouldBlock2 = wordCount2 < threshold;
145+
146+
expect(wordCount1).toBe(wordCount2);
147+
expect(shouldBlock1).toBe(shouldBlock2);
148+
});
149+
});
150+
});
151+
});

0 commit comments

Comments
 (0)