Skip to content

Commit

Permalink
Merge pull request #62 from uezo/develop
Browse files Browse the repository at this point in the history
Make it possible to select TTS engine when you add voice
  • Loading branch information
uezo authored Jun 27, 2020
2 parents 20be5fa + 38fc5de commit 6588938
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 35 deletions.
8 changes: 4 additions & 4 deletions ChatdollKit/Scripts/Model/AnimatedVoice.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@ public AnimatedVoice(List<Voice> voices = null, Dictionary<string, List<Animatio
Faces = faces ?? new List<FaceExpression>();
}

public void AddVoice(string name, float preGap = 0.0f, float postGap = 0.0f, string text = null, string url = null, Dictionary<string, string> ttsOptions = null, VoiceSource source = VoiceSource.Local)
public void AddVoice(string name, float preGap = 0.0f, float postGap = 0.0f, string text = null, string url = null, TTSConfiguration ttsConfig = null, VoiceSource source = VoiceSource.Local)
{
Voices.Add(new Voice(name, preGap, postGap, text, url, ttsOptions, source));
Voices.Add(new Voice(name, preGap, postGap, text, url, ttsConfig, source));
}

public void AddVoiceWeb(string url, float preGap = 0.0f, float postGap = 0.0f, string name = null, string text = null)
{
Voices.Add(new Voice(name ?? string.Empty, preGap, postGap, text, url, null, VoiceSource.Web));
}

public void AddVoiceTTS(string text, float preGap = 0.0f, float postGap = 0.0f, string name = null, Dictionary<string, string> ttsOptions = null)
public void AddVoiceTTS(string text, float preGap = 0.0f, float postGap = 0.0f, string name = null, TTSConfiguration ttsConfig = null)
{
Voices.Add(new Voice(name ?? string.Empty, preGap, postGap, text, string.Empty, ttsOptions, VoiceSource.TTS));
Voices.Add(new Voice(name ?? string.Empty, preGap, postGap, text, string.Empty, ttsConfig, VoiceSource.TTS));
}

public void AddAnimation(string name, string layerName = null, float duration = 0.0f, float fadeLength = -1.0f, float weight = 1.0f, float preGap = 0.0f, string description = null)
Expand Down
4 changes: 2 additions & 2 deletions ChatdollKit/Scripts/Model/AnimatedVoiceRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ public void AddVoiceWeb(string url, float preGap = 0.0f, float postGap = 0.0f, s
AnimatedVoices.Last().AddVoiceWeb(url, preGap, postGap, name, text);
}

public void AddVoiceTTS(string text, float preGap = 0.0f, float postGap = 0.0f, string name = null, Dictionary<string, string> ttsOptions = null, bool asNewFrame = false)
public void AddVoiceTTS(string text, float preGap = 0.0f, float postGap = 0.0f, string name = null, TTSConfiguration ttsConfig = null, bool asNewFrame = false)
{
if (asNewFrame || AnimatedVoices.Count == 0)
{
CreateNewFrame();
}
AnimatedVoices.Last().AddVoiceTTS(text, preGap, postGap, name, ttsOptions);
AnimatedVoices.Last().AddVoiceTTS(text, preGap, postGap, name, ttsConfig);
}

public void AddAnimation(string name, float duration = 0.0f, float fadeLength = -1.0f, float weight = 1.0f, float preGap = 0.0f, string description = null, bool asNewFrame = false)
Expand Down
24 changes: 23 additions & 1 deletion ChatdollKit/Scripts/Model/ModelController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class ModelController : MonoBehaviour
private Dictionary<string, AudioClip> voices = new Dictionary<string, AudioClip>();
public Func<Voice, Task<AudioClip>> VoiceDownloadFunc;
public Func<Voice, Task<AudioClip>> TextToSpeechFunc;
public Dictionary<string, Func<Voice, Task<AudioClip>>> TextToSpeechFunctions = new Dictionary<string, Func<Voice, Task<AudioClip>>>();
public bool UsePrefetch = true;

// Animation
Expand Down Expand Up @@ -285,7 +286,8 @@ public async Task Say(VoiceRequest request, CancellationToken token)
}
else if (v.Source == VoiceSource.TTS)
{
clip = await TextToSpeechFunc?.Invoke(v);
var ttsFunc = GetTTSFunction(v.GetTTSFunctionName());
clip = await ttsFunc?.Invoke(v);
}

if (clip != null)
Expand Down Expand Up @@ -351,6 +353,26 @@ public void AddVoice(string name, AudioClip audioClip)
voices[ReplaceDakuten(name)] = audioClip;
}

// Get registered TTS Function by name
public Func<Voice, Task<AudioClip>> GetTTSFunction(string name)
{
if (!string.IsNullOrEmpty(name) && TextToSpeechFunctions.ContainsKey(name))
{
return TextToSpeechFunctions[name];
}
return TextToSpeechFunc;
}

// Register TTS Function with name
public void RegisterTTSFunction(string name, Func<Voice, Task<AudioClip>> func, bool asDefault = false)
{
TextToSpeechFunctions[name] = func;
if (asDefault)
{
TextToSpeechFunc = func;
}
}

// Replace Japanese Dakuten from resource files
public string ReplaceDakuten(string value)
{
Expand Down
53 changes: 47 additions & 6 deletions ChatdollKit/Scripts/Model/Voice.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,68 @@ public class Voice
public float PostGap { get; set; }
public string Text { get; set; }
public string Url { get; set; }
public Dictionary<string, string> TTSOptions { get; set; }
public TTSConfiguration TTSConfig { get; set; }
public VoiceSource Source { get; set; }

public Voice(string name, float preGap, float postGap, string text, string url, Dictionary<string, string> ttsOptions, VoiceSource source)
public Voice(string name, float preGap, float postGap, string text, string url, TTSConfiguration ttsConfig, VoiceSource source)
{
Name = name;
PreGap = preGap;
PostGap = postGap;
Text = text;
Url = url;
TTSOptions = ttsOptions;
TTSConfig = ttsConfig;
Source = source;
}

public string GetTTSOption(string key)
public object GetTTSParam(string key)
{
if (TTSOptions != null && TTSOptions.ContainsKey(key))
if (TTSConfig != null)
{
return TTSOptions[key];
return TTSConfig.GetParam(key);
}
return null;
}

public string GetTTSFunctionName()
{
if (TTSConfig != null)
{
return TTSConfig.TTSFunctionName;
}
else
{
return string.Empty;
}
}
}

public class TTSConfiguration
{
public string TTSFunctionName { get; set; }
public Dictionary<string, object> Params { get; }

public TTSConfiguration()
{
Params = new Dictionary<string, object>();
}

public TTSConfiguration(string ttsFunctionName = null)
{
TTSFunctionName = ttsFunctionName ?? string.Empty;
Params = new Dictionary<string, object>();
}

public object GetParam(string key)
{
if (Params.ContainsKey(key))
{
return Params[key];
}
else
{
return null;
}
}
}
}
8 changes: 4 additions & 4 deletions ChatdollKit/Scripts/Model/VoiceRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ public VoiceRequest(params string[] voiceNames) : this()
}
}

public void AddVoice(string name, float preGap = 0.0f, float postGap = 0.0f, string text = null, string url = null, Dictionary<string, string> ttsOptions = null, VoiceSource source = VoiceSource.Local)
public void AddVoice(string name, float preGap = 0.0f, float postGap = 0.0f, string text = null, string url = null, TTSConfiguration ttsConfig = null, VoiceSource source = VoiceSource.Local)
{
Voices.Add(new Voice(name, preGap, postGap, text, url, ttsOptions, source));
Voices.Add(new Voice(name, preGap, postGap, text, url, ttsConfig, source));
}

public void AddVoiceWeb(string url, float preGap = 0.0f, float postGap = 0.0f, string name = null, string text = null)
{
Voices.Add(new Voice(name ?? string.Empty, preGap, postGap, text, url, null, VoiceSource.Web));
}

public void AddVoiceTTS(string text, float preGap = 0.0f, float postGap = 0.0f, string name = null, Dictionary<string, string> ttsOptions = null)
public void AddVoiceTTS(string text, float preGap = 0.0f, float postGap = 0.0f, string name = null, TTSConfiguration ttsConfig = null)
{
Voices.Add(new Voice(name ?? string.Empty, preGap, postGap, text, string.Empty, ttsOptions, VoiceSource.TTS));
Voices.Add(new Voice(name ?? string.Empty, preGap, postGap, text, string.Empty, ttsConfig, VoiceSource.TTS));
}
}
}
6 changes: 3 additions & 3 deletions Extension/AzureTTSLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ protected override async Task<AudioClip> DownloadAudioClipAsync(Voice voice)
www.SetRequestHeader("Ocp-Apim-Subscription-Key", ApiKey);

// Body
var ttsLanguage = voice.GetTTSOption("language") ?? Language;
var ttsGender = voice.GetTTSOption("gender") ?? Gender;
var ttsSpeakerName = voice.GetTTSOption("speakerName") ?? SpeakerName;
var ttsLanguage = voice.GetTTSParam("language") as string ?? Language;
var ttsGender = voice.GetTTSParam("gender") as string ?? Gender;
var ttsSpeakerName = voice.GetTTSParam("speakerName") as string ?? SpeakerName;
var text = $"<speak version='1.0' xml:lang='{ttsLanguage}'><voice xml:lang='{ttsLanguage}' xml:gender='{ttsGender}' name='{ttsSpeakerName}'>{voice.Text}</voice></speak>";
www.uploadHandler = new UploadHandlerRaw(System.Text.Encoding.UTF8.GetBytes(text));

Expand Down
30 changes: 15 additions & 15 deletions Extension/VoiceroidTTSLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,35 +67,35 @@ class VoiceroidRequest
public VoiceroidRequest(Voice voice)
{
Text = voice.Text;
Kana = voice.GetTTSOption("Kana");
Kana = (string)voice.GetTTSParam("Kana");
Speaker = new Dictionary<string, float>();
if (!string.IsNullOrEmpty(voice.GetTTSOption("Volume")))
if (voice.GetTTSParam("Volume") != null)
{
Speaker["Volume"] = float.Parse(voice.GetTTSOption("Volume"));
Speaker["Volume"] = (float)voice.GetTTSParam("Volume");
}
if (!string.IsNullOrEmpty(voice.GetTTSOption("Speed")))
if (voice.GetTTSParam("Speed") != null)
{
Speaker["Speed"] = float.Parse(voice.GetTTSOption("Speed"));
Speaker["Speed"] = (float)voice.GetTTSParam("Speed");
}
if (!string.IsNullOrEmpty(voice.GetTTSOption("Pitch")))
if (voice.GetTTSParam("Pitch") != null)
{
Speaker["Pitch"] = float.Parse(voice.GetTTSOption("Pitch"));
Speaker["Pitch"] = (float)voice.GetTTSParam("Pitch");
}
if (!string.IsNullOrEmpty(voice.GetTTSOption("Emphasis")))
if (voice.GetTTSParam("Emphasis") != null)
{
Speaker["Emphasis"] = float.Parse(voice.GetTTSOption("Emphasis"));
Speaker["Emphasis"] = (float)voice.GetTTSParam("Emphasis");
}
if (!string.IsNullOrEmpty(voice.GetTTSOption("PauseMiddle")))
if (voice.GetTTSParam("PauseMiddle") != null)
{
Speaker["PauseMiddle"] = float.Parse(voice.GetTTSOption("PauseMiddle"));
Speaker["PauseMiddle"] = (float)voice.GetTTSParam("PauseMiddle");
}
if (!string.IsNullOrEmpty(voice.GetTTSOption("PauseLong")))
if (voice.GetTTSParam("PauseLong") != null)
{
Speaker["PauseLong"] = float.Parse(voice.GetTTSOption("PauseLong"));
Speaker["PauseLong"] = (float)voice.GetTTSParam("PauseLong");
}
if (!string.IsNullOrEmpty(voice.GetTTSOption("PauseSentence")))
if (voice.GetTTSParam("PauseSentence") != null)
{
Speaker["PauseSentence"] = float.Parse(voice.GetTTSOption("PauseSentence"));
Speaker["PauseSentence"] = (float)voice.GetTTSParam("PauseSentence");
}
}
}
Expand Down

0 comments on commit 6588938

Please sign in to comment.