Skip to content

Commit 461de22

Browse files
committed
Fixed audio artifacts for text to speech
1 parent 4c17899 commit 461de22

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

src/main/java/example/ExampleBot.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public ExampleBot() {
3232

3333
// Windows (with closed captioning instead of wake detection)
3434
cord = VocalCord.newConfig(this).withClosedCaptioning().withTTS(SsmlVoiceGender.MALE,
35-
true).build();
35+
false).build();
3636

3737
// Linux (using WSL)
3838
// cord = VocalCord.newConfig(this).withWakeDetection("/mnt/c/Users/wdavi/IdeaProjects/VocalCord/native/linux/libjni_porcupine.so",

src/main/java/vocalcord/TTSEngine.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,10 @@ byte[] tts(String text) throws Exception {
4848
byte[] pcm = audioContents.toByteArray();
4949

5050
// Three things need to happen - big endian, stereo, pad to a multiple of 3840
51-
byte[] converted = new byte[pcm.length * 2 + (AUDIO_FRAME - pcm.length * 2 % AUDIO_FRAME)]; // ensures converted is a multiple of AUDIO_FRAME
52-
for(int i = 0; i < pcm.length; i += 2) {
51+
// Add a frame of silence at the beginning so that the sound doesn't clip weirdly
52+
byte[] converted = new byte[AUDIO_FRAME + pcm.length * 2 + (AUDIO_FRAME - pcm.length * 2 % AUDIO_FRAME)];
53+
// ensures converted is a multiple of AUDIO_FRAME
54+
for(int i = AUDIO_FRAME; i < pcm.length; i += 2) {
5355
short reversed = Short.reverseBytes((short) ((pcm[i] << 8) | (pcm[i + 1] & 0xFF)));
5456
byte low = (byte) (reversed >> 8);
5557
byte high = (byte) (reversed & 0x00FF);
@@ -66,6 +68,8 @@ byte[] tts(String text) throws Exception {
6668
}
6769

6870
void say(String phrase) throws Exception {
71+
this.index = Integer.MAX_VALUE;
72+
6973
if(ttsCache != null) {
7074
TTSCache.CacheResponse response = ttsCache.checkCache(phrase);
7175

0 commit comments

Comments
 (0)