Skip to content

Commit

Permalink
Fix crash and silence (#70)
Browse files Browse the repository at this point in the history
* fix crash of thread by waiting for it to finish.

* fix for silence maybe, just don't do anything if silence

* Update register_types.cpp

* upd
  • Loading branch information
Ughuuu authored Apr 18, 2024
1 parent bd9c18a commit c3682d7
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 7 deletions.
16 changes: 14 additions & 2 deletions bin/addons/godot_whisper/capture_stream_to_text.gd
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ func _get_configuration_warnings():
recording = value
if recording:
_ready()
else:
thread.wait_to_finish()
get:
return recording
## The interval at which transcribing is done. Use a value bigger than the time it takes to transcribe (eg. depends on model).
Expand Down Expand Up @@ -47,6 +49,9 @@ var thread : Thread
func _ready():
if Engine.is_editor_hint():
return
if thread && thread.is_alive():
recording = false
thread.wait_to_finish()
thread = Thread.new()
_effect_capture.clear_buffer()
thread.start(transcribe_thread)
Expand All @@ -62,6 +67,10 @@ func transcribe_thread():
if resampled.size() <= 0:
OS.delay_msec(transcribe_interval * 1000)
continue
var no_activity := voice_activity_detection(resampled)
#if no_activity:
#print("no activity")
#continue
var total_time : float= (resampled.size() as float) / SpeechToText.SPEECH_SETTING_SAMPLE_RATE
var audio_ctx : int = total_time * 1500 / 30 + 128
if !use_dynamic_audio_context:
Expand All @@ -75,7 +84,6 @@ func transcribe_thread():
var finish_sentence = false
if total_time > maximum_sentence_time:
finish_sentence = true
var no_activity := voice_activity_detection(resampled)
var text : String
for token in tokens:
text += token["text"]
Expand All @@ -84,10 +92,14 @@ func transcribe_thread():
finish_sentence = true
if total_time < minimum_sentence_time || abs(tokens.size() - last_token_count) > halucinating_count:
finish_sentence = false
var time_processing = (Time.get_ticks_msec() - start_time)
if no_activity:
#_accumulated_frames = []
continue
if finish_sentence:
_accumulated_frames = _accumulated_frames.slice(_accumulated_frames.size() - (0.2 * mix_rate))
#if !no_activity:
call_deferred("emit_signal", "transcribed_msg", finish_sentence, full_text)
var time_processing = (Time.get_ticks_msec() - start_time)
last_token_count = tokens.size()
#print(text)
print(full_text)
Expand Down
3 changes: 2 additions & 1 deletion bin/project.godot
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ config/icon="res://icon.png"
[audio]

driver/enable_input=true
driver/mix_rate=48000
enable_audio_input=true
mix_rate=48000
input/transcribe/max_tokens=64
input/transcribe/vad_treshold=4.0

[display]

Expand Down
1 change: 0 additions & 1 deletion scripts/build-dev.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
scons target=template_debug generate_bindings=no arch=universal dev_build=yes
#scons target=template_release generate_bindings=no arch=universal precision=single
rm -rf demo/addons/godot_whisper/bin
cp -rf bin/addons/godot_whisper/bin demo/addons/godot_whisper/bin

1 change: 0 additions & 1 deletion scripts/build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
scons target=template_release generate_bindings=no arch=universal precision=single
rm -rf samples/godot_whisper/addons/godot_whisper/bin
cp -rf bin/addons/godot_whisper/bin samples/godot_whisper/addons/godot_whisper/bin

4 changes: 2 additions & 2 deletions src/register_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ void initialize_whisper_module(ModuleInitializationLevel p_level) {

// register settings
register_setting("audio/input/transcribe/entropy_treshold", 2.8, PROPERTY_HINT_NONE, {});
register_setting("audio/input/transcribe/freq_treshold", 200, PROPERTY_HINT_NONE, {});
register_setting("audio/input/transcribe/freq_treshold", 200.0, PROPERTY_HINT_NONE, {});
register_setting("audio/input/transcribe/max_tokens", 16, PROPERTY_HINT_NONE, {});
register_setting("audio/input/transcribe/vad_treshold", 0.3, PROPERTY_HINT_NONE, {});
register_setting("audio/input/transcribe/vad_treshold", 2.0, PROPERTY_HINT_NONE, {});
register_setting("audio/input/transcribe/use_gpu", true, PROPERTY_HINT_NONE, {});
register_setting("audio/input/transcribe/speed_up_2x", false, PROPERTY_HINT_NONE, {});
}
Expand Down

0 comments on commit c3682d7

Please sign in to comment.