diff --git a/.github/workflows/windows_builds.yml b/.github/workflows/windows_builds.yml index c7d94b82eaa4..f8776b4c1157 100644 --- a/.github/workflows/windows_builds.yml +++ b/.github/workflows/windows_builds.yml @@ -93,6 +93,11 @@ jobs: echo "ANGLE_ENABLED=no" >> "$GITHUB_OUTPUT" fi + - name: Download WinRT components + shell: sh + run: python ./misc/scripts/install_winrt.py + continue-on-error: true + - name: Download pre-built AccessKit shell: sh id: accesskit-sdk diff --git a/core/os/spin_lock.h b/core/os/spin_lock.h index 7a5051035373..e8e7833f6f81 100644 --- a/core/os/spin_lock.h +++ b/core/os/spin_lock.h @@ -93,7 +93,11 @@ static_assert(std::atomic_bool::is_always_lock_free); class SpinLock { union { +#if __cplusplus >= 202002L + mutable std::atomic locked = false; +#else mutable std::atomic locked = ATOMIC_VAR_INIT(false); +#endif char aligner[Thread::CACHE_LINE_BYTES]; }; diff --git a/misc/scripts/install_winrt.py b/misc/scripts/install_winrt.py new file mode 100755 index 000000000000..ce69929cb4e9 --- /dev/null +++ b/misc/scripts/install_winrt.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +if __name__ != "__main__": + raise SystemExit(f'Utility script "{__file__}" should not be used as a module!') + +import os +import shutil +import sys +import urllib.request + +sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../")) + + +# Base Godot dependencies path +# If cross-compiling (no LOCALAPPDATA), we install in `bin` +deps_folder = os.getenv("LOCALAPPDATA") +if deps_folder: + deps_folder = os.path.join(deps_folder, "Godot", "build_deps") +else: + deps_folder = os.path.join("bin", "build_deps") + +# WinRT +winrt_version = "72" + +# Create dependencies folder +if not os.path.exists(deps_folder): + os.makedirs(deps_folder) + +winrt_filename = "winrt-headers.zip" +winrt_archive = os.path.join(deps_folder, winrt_filename) +winrt_folder = os.path.join(deps_folder, "winrt_mingw") + +if os.path.isfile(winrt_archive): + os.remove(winrt_archive) + +print(f"Downloading WinRT {winrt_filename} ...") +urllib.request.urlretrieve( + f"https://github.com/bruvzg/winrt_mingw/releases/download/{winrt_version}/{winrt_filename}", + winrt_archive, +) +if os.path.exists(winrt_folder): + print(f"Removing existing local WinRT installation in {winrt_folder} ...") + shutil.rmtree(winrt_folder) +print(f"Extracting WinRT {winrt_filename} to {winrt_folder} ...") +shutil.unpack_archive(winrt_archive, winrt_folder) +os.remove(winrt_archive) + +print("WinRT installed successfully.\n") diff --git a/platform/windows/SCsub b/platform/windows/SCsub index 8c24ecf71aac..cd9e9d52831a 100644 --- a/platform/windows/SCsub +++ b/platform/windows/SCsub @@ -16,7 +16,6 @@ common_win = [ "os_windows.cpp", "display_server_windows.cpp", "key_mapping_windows.cpp", - "tts_windows.cpp", "windows_terminal_logger.cpp", "windows_utils.cpp", "native_menu_windows.cpp", @@ -81,6 +80,30 @@ res_obj = env.RES(res_target, res_file) env.Depends(res_obj, "#core/version_generated.gen.h") env.add_source_files(sources, common_win) + +env_winrt = env.Clone() +if not env_winrt.msvc: + if "-std=gnu++17" in env_winrt["CXXFLAGS"]: + env_winrt["CXXFLAGS"].remove("-std=gnu++17") + env_winrt.Append(CXXFLAGS=["-std=gnu++20"]) + if "-fno-exceptions" in env_winrt["CXXFLAGS"]: + env_winrt["CXXFLAGS"].remove("-fno-exceptions") + env_winrt.Append(CXXFLAGS=["-fexceptions"]) +else: + if "/std:c++17" in env_winrt["CXXFLAGS"]: + env_winrt["CXXFLAGS"].remove("/std:c++17") + env_winrt.Append(CXXFLAGS=["/std:c++20"]) + if "_HAS_EXCEPTIONS" in env_winrt["CPPDEFINES"]: + env_winrt["CPPDEFINES"].remove("_HAS_EXCEPTIONS") + env_winrt.Append(CXXFLAGS=["/EHsc"]) +tts_sources = ["tts_windows.cpp", "tts_driver_sapi.cpp"] +if env_winrt["winrt_path"] != "" or env_winrt.msvc: + if not env_winrt.msvc: + env_winrt.Append(CPPPATH=[env["winrt_path"]]) + env_winrt.AppendUnique(CPPDEFINES=["WINRT_ENABLED"]) + tts_sources += ["tts_driver_onecore.cpp"] +env_winrt.add_source_files(sources, tts_sources) + sources += res_obj if env["accesskit"] and not env.msvc: diff --git a/platform/windows/detect.py b/platform/windows/detect.py index e49bb29067c7..6453c60bb365 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -200,11 +200,18 @@ def get_opts(): "Path to the AccessKit C SDK", os.path.join(deps_folder, "accesskit"), ), + # OpenGL over Direct3D 11. ( "angle_libs", "Path to the ANGLE static libraries", os.path.join(deps_folder, "angle"), ), + # WinRT. + ( + "winrt_path", + "Path to the WinRT headers", + os.path.join(deps_folder, "winrt_mingw"), + ), # Direct3D 12 support. ( "mesa_libs", @@ -418,6 +425,7 @@ def spawn_capture(sh, escape, cmd, args, env): "wbemuuid", "ntdll", "hid", + "mincore", ] if env.debug_features: @@ -811,6 +819,7 @@ def configure_mingw(env: "SConsEnvironment"): "wbemuuid", "ntdll", "hid", + "mincore", ] ) diff --git a/platform/windows/tts_driver.h b/platform/windows/tts_driver.h new file mode 100644 index 000000000000..3cef44619683 --- /dev/null +++ b/platform/windows/tts_driver.h @@ -0,0 +1,53 @@ +/**************************************************************************/ +/* tts_driver.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "core/object/object.h" + +class TTSDriver : public Object { + GDSOFTCLASS(TTSDriver, Object); + +public: + virtual bool is_speaking() const = 0; + virtual bool is_paused() const = 0; + virtual Array get_voices() const = 0; + + virtual void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int64_t p_utterance_id = 0, bool p_interrupt = false) = 0; + virtual void pause() = 0; + virtual void resume() = 0; + virtual void stop() = 0; + + virtual void process_events() = 0; + + virtual bool init() = 0; + + virtual ~TTSDriver() {} +}; diff --git a/platform/windows/tts_driver_onecore.cpp b/platform/windows/tts_driver_onecore.cpp new file mode 100644 index 000000000000..a456af67ddb8 --- /dev/null +++ b/platform/windows/tts_driver_onecore.cpp @@ -0,0 +1,267 @@ +/**************************************************************************/ +/* tts_driver_onecore.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "tts_driver_onecore.h" + +#include "core/object/callable_mp.h" +#include "servers/display/display_server.h" + +TTSDriverOneCore *TTSDriverOneCore::singleton = nullptr; + +void TTSDriverOneCore::_speech_index_mark(int p_msg_id, int p_index_mark) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_BOUNDARY, p_msg_id, p_index_mark); +} + +void TTSDriverOneCore::_speech_cancel(int p_msg_id) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_msg_id); +} + +void TTSDriverOneCore::_speech_end(int p_msg_id) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_ENDED, p_msg_id); +} + +void TTSDriverOneCore::_dispose_current(bool p_silent, bool p_canceled) { + if (media.get() != nullptr) { + for (const TrackData &T : tracks) { + T.track.CueEntered(T.token); + } + tracks.clear(); + media->MediaFailed(singleton->token_f); + media->MediaEnded(singleton->token_e); + if (!ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) { + media->PlaybackMediaMarkerReached(singleton->token_s); + } + media->Close(); + media.reset(); + + if (!p_silent) { + if (p_canceled) { + callable_mp(this, &TTSDriverOneCore::_speech_cancel).call_deferred(id); + } else { + callable_mp(this, &TTSDriverOneCore::_speech_end).call_deferred(id); + } + } + id = -1; + string = Char16String(); + playing = false; + paused = false; + offset = 0; + } +} + +void TTSDriverOneCore::process_events() { + if (update_requested && !paused && queue.size() > 0 && !is_speaking()) { + TTSUtterance &message = queue.front()->get(); + _dispose_current(true); + playing = true; + + SpeechSynthesizer synth = SpeechSynthesizer(); + + if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) { + synth.Options().IncludeWordBoundaryMetadata(true); + } + if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 5)) { + synth.Options().SpeakingRate(CLAMP(message.rate, 0.5, 6.0)); + synth.Options().AudioPitch(CLAMP(message.pitch, 0.0, 2.0)); + synth.Options().AudioVolume(CLAMP((double)message.volume / 100.0, 0.0, 1.0)); + } + + winrt::hstring name = winrt::hstring((const wchar_t *)message.voice.utf16().get_data()); + IVectorView voices = SpeechSynthesizer::AllVoices(); + for (uint32_t i = 0; i < voices.Size(); i++) { + VoiceInformation voice = voices.GetAt(i); + if (voice.Id() == name) { + synth.Voice(voice); + break; + } + } + + string = message.text.utf16(); + winrt::hstring text = winrt::hstring((const wchar_t *)string.get_data()); + + SpeechSynthesisStream stream = synth.SynthesizeTextToStreamAsync(text).get(); + + media = std::make_shared(); + token_f = media->MediaFailed([=, this](const MediaPlayer &p_sender, const MediaPlayerFailedEventArgs &p_args) { + _dispose_current(false, true); + }); + token_e = media->MediaEnded([=, this](const MediaPlayer &p_sender, const IInspectable &p_args) { + _dispose_current(false, false); + }); + if (ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 4)) { + MediaPlaybackItem mitem = MediaPlaybackItem(MediaSource::CreateFromStream(stream, stream.ContentType())); + media->Source(mitem); + MediaPlaybackTimedMetadataTrackList list = mitem.TimedMetadataTracks(); + + for (uint32_t i = 0; i < list.Size(); i++) { + TimedMetadataTrack track = list.GetAt(i); + if (track.TimedMetadataKind() == TimedMetadataKind::Speech) { + winrt::event_token token = track.CueEntered([=, this](const TimedMetadataTrack &p_sender, const MediaCueEventArgs &p_args) { + SpeechCue sq; + p_args.Cue().as(sq); + int32_t pos16 = sq.StartPositionInInput().Value(); + int pos = 0; + for (int j = 0; j < MIN(pos16, string.length()); j++) { + char16_t c = string[j]; + if ((c & 0xfffffc00) == 0xd800) { + j++; + } + pos++; + } + callable_mp(singleton, &TTSDriverOneCore::_speech_index_mark).call_deferred(id, pos); + }); + tracks.push_back({ track, token }); + list.SetPresentationMode(i, TimedMetadataTrackPresentationMode::ApplicationPresented); + } + } + } else { + media->Source(MediaSource::CreateFromStream(stream, stream.ContentType())); + token_s = media->PlaybackMediaMarkerReached([=, this](const MediaPlayer &p_sender, const PlaybackMediaMarkerReachedEventArgs &p_args) { + offset += p_args.PlaybackMediaMarker().Text().size() + 1; + int pos = 0; + for (int j = 0; j < MIN(offset, string.length()); j++) { + char16_t c = string[j]; + if ((c & 0xfffffc00) == 0xd800) { + j++; + } + pos++; + } + callable_mp(singleton, &TTSDriverOneCore::_speech_index_mark).call_deferred(id, pos); + }); + } + media->AutoPlay(true); + + id = message.id; + update_requested = false; + paused = false; + + media->Play(); + + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_STARTED, message.id); + queue.pop_front(); + } +} + +bool TTSDriverOneCore::is_speaking() const { + return playing; +} + +bool TTSDriverOneCore::is_paused() const { + return paused; +} + +Array TTSDriverOneCore::get_voices() const { + Array list; + + IVectorView voices = SpeechSynthesizer::AllVoices(); + for (uint32_t i = 0; i < voices.Size(); i++) { + VoiceInformation voice = voices.GetAt(i); + winrt::hstring vname = voice.DisplayName(); + winrt::hstring vid = voice.Id(); + winrt::hstring vlang = voice.Language(); + + Dictionary voice_d; + voice_d["id"] = String::utf16((const char16_t *)vid.c_str(), vid.size()); + voice_d["name"] = String::utf16((const char16_t *)vname.c_str(), vname.size()); + voice_d["language"] = String::utf16((const char16_t *)vlang.c_str(), vlang.size()); + list.push_back(voice_d); + } + return list; +} + +void TTSDriverOneCore::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int64_t p_utterance_id, bool p_interrupt) { + if (p_interrupt) { + stop(); + } + + if (p_text.is_empty()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_utterance_id); + return; + } + + TTSUtterance message; + message.text = p_text; + message.voice = p_voice; + message.volume = CLAMP(p_volume, 0, 100); + message.pitch = CLAMP(p_pitch, 0.f, 2.f); + message.rate = CLAMP(p_rate, 0.1f, 10.f); + message.id = p_utterance_id; + queue.push_back(message); + + if (is_paused()) { + resume(); + } else { + update_requested = true; + } +} + +void TTSDriverOneCore::pause() { + if (!paused && playing) { + media->Pause(); + paused = true; + } +} + +void TTSDriverOneCore::resume() { + if (paused && playing) { + media->Play(); + paused = false; + } +} + +void TTSDriverOneCore::stop() { + for (TTSUtterance &message : queue) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, message.id); + } + queue.clear(); + _dispose_current(false, true); +} + +bool TTSDriverOneCore::init() { + if (!ApiInformation::IsApiContractPresent(L"Windows.Foundation.UniversalApiContract", 1)) { + print_verbose("Text-to-Speech: Cannot initialize OneCore driver, API contract not present!"); + return false; + } + if (SpeechSynthesizer::AllVoices().Size() == 0) { + print_verbose("Text-to-Speech: Cannot initialize OneCore driver, no voices found!"); + return false; + } + print_verbose("Text-to-Speech: OneCore initialized."); + return true; +} + +TTSDriverOneCore::TTSDriverOneCore() { + singleton = this; +} + +TTSDriverOneCore::~TTSDriverOneCore() { + _dispose_current(false, true); + singleton = nullptr; +} diff --git a/platform/windows/tts_driver_onecore.h b/platform/windows/tts_driver_onecore.h new file mode 100644 index 000000000000..6cd8f181e231 --- /dev/null +++ b/platform/windows/tts_driver_onecore.h @@ -0,0 +1,107 @@ +/**************************************************************************/ +/* tts_driver_onecore.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "tts_driver.h" + +GODOT_GCC_WARNING_PUSH +GODOT_GCC_WARNING_IGNORE("-Wnon-virtual-dtor") +GODOT_GCC_WARNING_IGNORE("-Wctor-dtor-privacy") +GODOT_GCC_WARNING_IGNORE("-Wshadow") +GODOT_GCC_WARNING_IGNORE("-Wstrict-aliasing") +GODOT_CLANG_WARNING_PUSH +GODOT_CLANG_WARNING_IGNORE("-Wnon-virtual-dtor") + +#include +#include +#include +#include +#include +#include + +GODOT_GCC_WARNING_POP +GODOT_CLANG_WARNING_POP + +using namespace winrt::Windows::Foundation; +using namespace winrt::Windows::Foundation::Collections; +using namespace winrt::Windows::Foundation::Metadata; +using namespace winrt::Windows::Media::Core; +using namespace winrt::Windows::Media::Playback; +using namespace winrt::Windows::Media::SpeechSynthesis; +using namespace winrt::Windows::Storage::Streams; + +struct TTSUtterance; + +class TTSDriverOneCore : public TTSDriver { + List queue; + + bool playing = false; + bool paused = false; + bool update_requested = false; + + int64_t id = -1; + Char16String string; + std::shared_ptr media; + struct TrackData { + TimedMetadataTrack track; + winrt::event_token token{}; + }; + Vector tracks; + winrt::event_token token_s{}; + winrt::event_token token_f{}; + winrt::event_token token_e{}; + int64_t offset = 0; + + void _dispose_current(bool p_silent = false, bool p_canceled = false); + + void _speech_cancel(int p_msg_id); + void _speech_end(int p_msg_id); + void _speech_index_mark(int p_msg_id, int p_index_mark); + + static TTSDriverOneCore *singleton; + +public: + virtual bool is_speaking() const override; + virtual bool is_paused() const override; + virtual Array get_voices() const override; + + virtual void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int64_t p_utterance_id = 0, bool p_interrupt = false) override; + virtual void pause() override; + virtual void resume() override; + virtual void stop() override; + + virtual void process_events() override; + + virtual bool init() override; + + TTSDriverOneCore(); + ~TTSDriverOneCore(); +}; diff --git a/platform/windows/tts_driver_sapi.cpp b/platform/windows/tts_driver_sapi.cpp new file mode 100644 index 000000000000..8e69d5b6865b --- /dev/null +++ b/platform/windows/tts_driver_sapi.cpp @@ -0,0 +1,274 @@ +/**************************************************************************/ +/* tts_driver_sapi.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "tts_driver_sapi.h" + +#include "core/object/callable_mp.h" +#include "servers/display/display_server.h" + +TTSDriverSAPI *TTSDriverSAPI::singleton = nullptr; + +void __stdcall TTSDriverSAPI::speech_event_callback(WPARAM wParam, LPARAM lParam) { + SPEVENT event; + while (singleton->synth->GetEvents(1, &event, nullptr) == S_OK) { + uint32_t stream_num = (uint32_t)event.ulStreamNum; + if (singleton->ids.has(stream_num)) { + if (event.eEventId == SPEI_START_INPUT_STREAM) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_STARTED, singleton->ids[stream_num].id); + } else if (event.eEventId == SPEI_END_INPUT_STREAM) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_ENDED, singleton->ids[stream_num].id); + singleton->ids.erase(stream_num); + singleton->update_requested = true; + } else if (event.eEventId == SPEI_WORD_BOUNDARY) { + const Char16String &string = singleton->ids[stream_num].string; + int pos = 0; + for (int i = 0; i < MIN(event.lParam, string.length()); i++) { + char16_t c = string[i]; + if ((c & 0xfffffc00) == 0xd800) { + i++; + } + pos++; + } + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_BOUNDARY, singleton->ids[stream_num].id, pos - singleton->ids[stream_num].offset); + } + } + } +} + +void TTSDriverSAPI::process_events() { + if (update_requested && !paused && queue.size() > 0 && !is_speaking()) { + TTSUtterance &message = queue.front()->get(); + + String text; + DWORD flags = SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML; + String pitch_tag = String(""); + text = pitch_tag + message.text + String(""); + + IEnumSpObjectTokens *cpEnum; + ISpObjectToken *cpVoiceToken; + ULONG ulCount = 0; + ULONG stream_number = 0; + ISpObjectTokenCategory *cpCategory; + HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory); + if (SUCCEEDED(hr)) { + hr = cpCategory->SetId(SPCAT_VOICES, false); + if (SUCCEEDED(hr)) { + hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum); + if (SUCCEEDED(hr)) { + hr = cpEnum->GetCount(&ulCount); + while (SUCCEEDED(hr) && ulCount--) { + wchar_t *w_id = nullptr; + hr = cpEnum->Next(1, &cpVoiceToken, nullptr); + cpVoiceToken->GetId(&w_id); + if (String::utf16((const char16_t *)w_id) == message.voice) { + synth->SetVoice(cpVoiceToken); + cpVoiceToken->Release(); + break; + } + cpVoiceToken->Release(); + } + cpEnum->Release(); + } + } + cpCategory->Release(); + } + + UTData ut; + ut.string = text.utf16(); + ut.offset = pitch_tag.length(); // Subtract injected tag offset. + ut.id = message.id; + + synth->SetVolume(message.volume); + synth->SetRate(10.f * std::log10(message.rate) / std::log10(3.f)); + synth->Speak((LPCWSTR)ut.string.get_data(), flags, &stream_number); + + ids[(uint32_t)stream_number] = ut; + + queue.pop_front(); + + update_requested = false; + } +} + +bool TTSDriverSAPI::is_speaking() const { + ERR_FAIL_NULL_V(synth, false); + + SPVOICESTATUS status; + synth->GetStatus(&status, nullptr); + return (status.dwRunningState == SPRS_IS_SPEAKING || status.dwRunningState == 0 /* Waiting To Speak */); +} + +bool TTSDriverSAPI::is_paused() const { + ERR_FAIL_NULL_V(synth, false); + return paused; +} + +Array TTSDriverSAPI::get_voices() const { + Array list; + IEnumSpObjectTokens *cpEnum; + ISpObjectToken *cpVoiceToken; + ISpDataKey *cpDataKeyAttribs; + ULONG ulCount = 0; + ISpObjectTokenCategory *cpCategory; + HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory); + if (SUCCEEDED(hr)) { + hr = cpCategory->SetId(SPCAT_VOICES, false); + if (SUCCEEDED(hr)) { + hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum); + if (SUCCEEDED(hr)) { + hr = cpEnum->GetCount(&ulCount); + while (SUCCEEDED(hr) && ulCount--) { + hr = cpEnum->Next(1, &cpVoiceToken, nullptr); + HRESULT hr_attr = cpVoiceToken->OpenKey(SPTOKENKEY_ATTRIBUTES, &cpDataKeyAttribs); + if (SUCCEEDED(hr_attr)) { + wchar_t *w_id = nullptr; + wchar_t *w_lang = nullptr; + wchar_t *w_name = nullptr; + cpVoiceToken->GetId(&w_id); + cpDataKeyAttribs->GetStringValue(L"Language", &w_lang); + cpDataKeyAttribs->GetStringValue(nullptr, &w_name); + LCID locale = wcstol(w_lang, nullptr, 16); + + int locale_chars = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, nullptr, 0); + int region_chars = GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, nullptr, 0); + wchar_t *w_lang_code = new wchar_t[locale_chars]; + wchar_t *w_reg_code = new wchar_t[region_chars]; + GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, w_lang_code, locale_chars); + GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, w_reg_code, region_chars); + + Dictionary voice_d; + voice_d["id"] = String::utf16((const char16_t *)w_id); + if (w_name) { + voice_d["name"] = String::utf16((const char16_t *)w_name); + } else { + voice_d["name"] = voice_d["id"].operator String().replace("HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\", ""); + } + voice_d["language"] = String::utf16((const char16_t *)w_lang_code) + "_" + String::utf16((const char16_t *)w_reg_code); + list.push_back(voice_d); + + delete[] w_lang_code; + delete[] w_reg_code; + + cpDataKeyAttribs->Release(); + } + cpVoiceToken->Release(); + } + cpEnum->Release(); + } + } + cpCategory->Release(); + } + return list; +} + +void TTSDriverSAPI::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int64_t p_utterance_id, bool p_interrupt) { + ERR_FAIL_NULL(synth); + if (p_interrupt) { + stop(); + } + + if (p_text.is_empty()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_utterance_id); + return; + } + + TTSUtterance message; + message.text = p_text; + message.voice = p_voice; + message.volume = CLAMP(p_volume, 0, 100); + message.pitch = CLAMP(p_pitch, 0.f, 2.f); + message.rate = CLAMP(p_rate, 0.1f, 10.f); + message.id = p_utterance_id; + queue.push_back(message); + + if (is_paused()) { + resume(); + } else { + update_requested = true; + } +} + +void TTSDriverSAPI::pause() { + ERR_FAIL_NULL(synth); + if (!paused) { + if (synth->Pause() == S_OK) { + paused = true; + } + } +} + +void TTSDriverSAPI::resume() { + ERR_FAIL_NULL(synth); + synth->Resume(); + paused = false; +} + +void TTSDriverSAPI::stop() { + ERR_FAIL_NULL(synth); + + SPVOICESTATUS status; + synth->GetStatus(&status, nullptr); + uint32_t current_stream = (uint32_t)status.ulCurrentStream; + if (ids.has(current_stream)) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, ids[current_stream].id); + ids.erase(current_stream); + } + for (TTSUtterance &message : queue) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, message.id); + } + queue.clear(); + synth->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr); + synth->Resume(); + paused = false; +} + +bool TTSDriverSAPI::init() { + if (SUCCEEDED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&synth))) { + ULONGLONG event_mask = SPFEI(SPEI_END_INPUT_STREAM) | SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_WORD_BOUNDARY); + synth->SetInterest(event_mask, event_mask); + synth->SetNotifyCallbackFunction(&speech_event_callback, (WPARAM)(this), 0); + print_verbose("Text-to-Speech: SAPI initialized."); + return true; + } else { + print_verbose("Text-to-Speech: Cannot initialize SAPI driver!"); + return false; + } +} + +TTSDriverSAPI::TTSDriverSAPI() { + singleton = this; +} + +TTSDriverSAPI::~TTSDriverSAPI() { + if (synth) { + synth->Release(); + } + singleton = nullptr; +} diff --git a/platform/windows/tts_driver_sapi.h b/platform/windows/tts_driver_sapi.h new file mode 100644 index 000000000000..e40495e345d0 --- /dev/null +++ b/platform/windows/tts_driver_sapi.h @@ -0,0 +1,77 @@ +/**************************************************************************/ +/* tts_driver_sapi.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "tts_driver.h" + +#include + +#include +#include +#include + +#include + +struct TTSUtterance; + +class TTSDriverSAPI : public TTSDriver { + List queue; + ISpVoice *synth = nullptr; + bool paused = false; + struct UTData { + Char16String string; + int offset; + int64_t id; + }; + HashMap ids; + bool update_requested = false; + + static void __stdcall speech_event_callback(WPARAM wParam, LPARAM lParam); + + static TTSDriverSAPI *singleton; + +public: + virtual bool is_speaking() const override; + virtual bool is_paused() const override; + virtual Array get_voices() const override; + + virtual void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int64_t p_utterance_id = 0, bool p_interrupt = false) override; + virtual void pause() override; + virtual void resume() override; + virtual void stop() override; + + virtual void process_events() override; + + virtual bool init() override; + + TTSDriverSAPI(); + ~TTSDriverSAPI(); +}; diff --git a/platform/windows/tts_windows.cpp b/platform/windows/tts_windows.cpp index 15364a248e1b..6273483ff60d 100644 --- a/platform/windows/tts_windows.cpp +++ b/platform/windows/tts_windows.cpp @@ -30,245 +30,92 @@ #include "tts_windows.h" -#include "servers/display/display_server.h" +#include "tts_driver_sapi.h" -TTS_Windows *TTS_Windows::singleton = nullptr; - -void __stdcall TTS_Windows::speech_event_callback(WPARAM wParam, LPARAM lParam) { - TTS_Windows *tts = TTS_Windows::get_singleton(); - SPEVENT event; - while (tts->synth->GetEvents(1, &event, nullptr) == S_OK) { - uint32_t stream_num = (uint32_t)event.ulStreamNum; - if (tts->ids.has(stream_num)) { - if (event.eEventId == SPEI_START_INPUT_STREAM) { - DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_STARTED, tts->ids[stream_num].id); - } else if (event.eEventId == SPEI_END_INPUT_STREAM) { - DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_ENDED, tts->ids[stream_num].id); - tts->ids.erase(stream_num); - tts->update_requested = true; - } else if (event.eEventId == SPEI_WORD_BOUNDARY) { - const Char16String &string = tts->ids[stream_num].string; - int pos = 0; - for (int i = 0; i < MIN(event.lParam, string.length()); i++) { - char16_t c = string[i]; - if ((c & 0xfffffc00) == 0xd800) { - i++; - } - pos++; - } - DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_BOUNDARY, tts->ids[stream_num].id, pos - tts->ids[stream_num].offset); - } - } - } -} - -void TTS_Windows::process_events() { - if (update_requested && !paused && queue.size() > 0 && !is_speaking()) { - TTSUtterance &message = queue.front()->get(); - - String text; - DWORD flags = SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML; - String pitch_tag = String(""); - text = pitch_tag + message.text + String(""); - - IEnumSpObjectTokens *cpEnum; - ISpObjectToken *cpVoiceToken; - ULONG ulCount = 0; - ULONG stream_number = 0; - ISpObjectTokenCategory *cpCategory; - HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory); - if (SUCCEEDED(hr)) { - hr = cpCategory->SetId(SPCAT_VOICES, false); - if (SUCCEEDED(hr)) { - hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum); - if (SUCCEEDED(hr)) { - hr = cpEnum->GetCount(&ulCount); - while (SUCCEEDED(hr) && ulCount--) { - wchar_t *w_id = nullptr; - hr = cpEnum->Next(1, &cpVoiceToken, nullptr); - cpVoiceToken->GetId(&w_id); - if (String::utf16((const char16_t *)w_id) == message.voice) { - synth->SetVoice(cpVoiceToken); - cpVoiceToken->Release(); - break; - } - cpVoiceToken->Release(); - } - cpEnum->Release(); - } - } - cpCategory->Release(); - } - - UTData ut; - ut.string = text.utf16(); - ut.offset = pitch_tag.length(); // Subtract injected tag offset. - ut.id = message.id; +#ifdef WINRT_ENABLED +#include "tts_driver_onecore.h" +#endif - synth->SetVolume(message.volume); - synth->SetRate(10.f * std::log10(message.rate) / std::log10(3.f)); - synth->Speak((LPCWSTR)ut.string.get_data(), flags, &stream_number); - - ids[(uint32_t)stream_number] = ut; - - queue.pop_front(); +TTS_Windows *TTS_Windows::singleton = nullptr; - update_requested = false; - } +TTS_Windows *TTS_Windows::get_singleton() { + return singleton; } bool TTS_Windows::is_speaking() const { - ERR_FAIL_NULL_V(synth, false); - - SPVOICESTATUS status; - synth->GetStatus(&status, nullptr); - return (status.dwRunningState == SPRS_IS_SPEAKING || status.dwRunningState == 0 /* Waiting To Speak */); + if (driver) { + return driver->is_speaking(); + } + return false; } bool TTS_Windows::is_paused() const { - ERR_FAIL_NULL_V(synth, false); - return paused; + if (driver) { + return driver->is_paused(); + } + return false; } Array TTS_Windows::get_voices() const { - Array list; - IEnumSpObjectTokens *cpEnum; - ISpObjectToken *cpVoiceToken; - ISpDataKey *cpDataKeyAttribs; - ULONG ulCount = 0; - ISpObjectTokenCategory *cpCategory; - HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory); - if (SUCCEEDED(hr)) { - hr = cpCategory->SetId(SPCAT_VOICES, false); - if (SUCCEEDED(hr)) { - hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum); - if (SUCCEEDED(hr)) { - hr = cpEnum->GetCount(&ulCount); - while (SUCCEEDED(hr) && ulCount--) { - hr = cpEnum->Next(1, &cpVoiceToken, nullptr); - HRESULT hr_attr = cpVoiceToken->OpenKey(SPTOKENKEY_ATTRIBUTES, &cpDataKeyAttribs); - if (SUCCEEDED(hr_attr)) { - wchar_t *w_id = nullptr; - wchar_t *w_lang = nullptr; - wchar_t *w_name = nullptr; - cpVoiceToken->GetId(&w_id); - cpDataKeyAttribs->GetStringValue(L"Language", &w_lang); - cpDataKeyAttribs->GetStringValue(nullptr, &w_name); - LCID locale = wcstol(w_lang, nullptr, 16); - - int locale_chars = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, nullptr, 0); - int region_chars = GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, nullptr, 0); - wchar_t *w_lang_code = new wchar_t[locale_chars]; - wchar_t *w_reg_code = new wchar_t[region_chars]; - GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, w_lang_code, locale_chars); - GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, w_reg_code, region_chars); - - Dictionary voice_d; - voice_d["id"] = String::utf16((const char16_t *)w_id); - if (w_name) { - voice_d["name"] = String::utf16((const char16_t *)w_name); - } else { - voice_d["name"] = voice_d["id"].operator String().replace("HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\", ""); - } - voice_d["language"] = String::utf16((const char16_t *)w_lang_code) + "_" + String::utf16((const char16_t *)w_reg_code); - list.push_back(voice_d); - - delete[] w_lang_code; - delete[] w_reg_code; - - cpDataKeyAttribs->Release(); - } - cpVoiceToken->Release(); - } - cpEnum->Release(); - } - } - cpCategory->Release(); + if (driver) { + return driver->get_voices(); } - return list; + return Array(); } void TTS_Windows::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int64_t p_utterance_id, bool p_interrupt) { - ERR_FAIL_NULL(synth); - if (p_interrupt) { - stop(); - } - - if (p_text.is_empty()) { - DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, p_utterance_id); - return; - } - - TTSUtterance message; - message.text = p_text; - message.voice = p_voice; - message.volume = CLAMP(p_volume, 0, 100); - message.pitch = CLAMP(p_pitch, 0.f, 2.f); - message.rate = CLAMP(p_rate, 0.1f, 10.f); - message.id = p_utterance_id; - queue.push_back(message); - - if (is_paused()) { - resume(); - } else { - update_requested = true; + if (driver) { + driver->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt); } } void TTS_Windows::pause() { - ERR_FAIL_NULL(synth); - if (!paused) { - if (synth->Pause() == S_OK) { - paused = true; - } + if (driver) { + driver->pause(); } } void TTS_Windows::resume() { - ERR_FAIL_NULL(synth); - synth->Resume(); - paused = false; + if (driver) { + driver->resume(); + } } void TTS_Windows::stop() { - ERR_FAIL_NULL(synth); - - SPVOICESTATUS status; - synth->GetStatus(&status, nullptr); - uint32_t current_stream = (uint32_t)status.ulCurrentStream; - if (ids.has(current_stream)) { - DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, ids[current_stream].id); - ids.erase(current_stream); - } - for (TTSUtterance &message : queue) { - DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServerEnums::TTS_UTTERANCE_CANCELED, message.id); + if (driver) { + driver->stop(); } - queue.clear(); - synth->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr); - synth->Resume(); - paused = false; } -TTS_Windows *TTS_Windows::get_singleton() { - return singleton; +void TTS_Windows::process_events() { + if (driver) { + driver->process_events(); + } } TTS_Windows::TTS_Windows() { - singleton = this; - - if (SUCCEEDED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&synth))) { - ULONGLONG event_mask = SPFEI(SPEI_END_INPUT_STREAM) | SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_WORD_BOUNDARY); - synth->SetInterest(event_mask, event_mask); - synth->SetNotifyCallbackFunction(&speech_event_callback, (WPARAM)(this), 0); - print_verbose("Text-to-Speech: SAPI initialized."); - } else { - print_verbose("Text-to-Speech: Cannot initialize ISpVoice!"); +#ifdef WINRT_ENABLED + // Try OneCore driver. + if (!driver) { + driver = memnew(TTSDriverOneCore); + if (!driver->init()) { + memdelete(driver); + driver = nullptr; + } + } +#endif + // Try SAPI driver. + if (!driver) { + driver = memnew(TTSDriverSAPI); + if (!driver->init()) { + memdelete(driver); + driver = nullptr; + } } } TTS_Windows::~TTS_Windows() { - if (synth) { - synth->Release(); + if (driver) { + memdelete(driver); } - singleton = nullptr; } diff --git a/platform/windows/tts_windows.h b/platform/windows/tts_windows.h index 5efef4c6e896..a2f7d9140410 100644 --- a/platform/windows/tts_windows.h +++ b/platform/windows/tts_windows.h @@ -31,31 +31,14 @@ #pragma once #include "core/string/ustring.h" -#include "core/templates/hash_map.h" -#include "core/templates/list.h" #include "core/variant/array.h" -#include - -#include -#include -#include +class TTSDriver; struct TTSUtterance; class TTS_Windows { - List queue; - ISpVoice *synth = nullptr; - bool paused = false; - struct UTData { - Char16String string; - int offset; - int64_t id; - }; - HashMap ids; - bool update_requested = false; - - static void __stdcall speech_event_callback(WPARAM wParam, LPARAM lParam); + TTSDriver *driver = nullptr; static TTS_Windows *singleton;