-
Notifications
You must be signed in to change notification settings - Fork 0
/
speech.ts
executable file
·105 lines (88 loc) · 3.87 KB
/
speech.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/* Voice API */
import path from "node:path";
import fs from "node:fs";
import childProcess from "node:child_process";
const osDicMap: Record<string, string> = {
win32: "C:/open_jtalk/dic",
darwin: "/opt/homebrew/opt/open-jtalk/dic",
linux: "/var/lib/mecab/dic/open-jtalk/naist-jdic"
};
const dic = osDicMap[process.platform] ?? osDicMap.linux;
interface VoiceDictionary {
[key: string]: string;
}
const dictionary: VoiceDictionary = fs.readFileSync(path.join(__dirname, "dictionary.dic"), "utf-8")
.split("\n")
.reduce((obj: VoiceDictionary, line: string) => {
const [key, value] = line.split(":");
obj[key] = value;
return obj;
}, {});
/*function englishToKatakana(text: string): Promise<string> {
return new Promise((resolve, reject) => {
fetch("https://www.sljfaq.org/cgi/e2k_ja.cgi?o=json&lang=ja&word=" + encodeURIComponent(text))
.then((response) => response.json())
.then((data) => {
try {
let outputText = text;
for (const wordObj of data.words) {
if (wordObj.j_pron_only) {
const regex = new RegExp("\\b" + wordObj.word + "\\b", "g");
outputText = outputText.replace(regex, wordObj.j_pron_only);
}
}
resolve(outputText);
} catch (error) {
resolve(text);
}
})
.catch((error) => {
reject(error);
});
});
}*/
async function generateVoice(text:string, filepath:string, model:string, speed:number, tone:number, intonation:number, volume:number, between:number): Promise<string> {
if (!speed) throw new Error("Speed is not defined");
const processedText = //await englishToKatakana(
text.toLowerCase()
.replace(new RegExp(Object.keys(dictionary).join("|"), "g"), (match) => dictionary[match]);
//);
const texts: string[] = processedText
.trim()
.split(/。|\n|\.|,/);
const promises = texts
.filter((n) => n)
.map(async (_text, index) => {
if (_text === "") return;
const filePath = path.join(`${filepath}_${index}.wav`);
await execPromise(`echo ${JSON.stringify(_text)} | open_jtalk -x ${dic} -m ${model} -r ${speed} -fm ${tone} -jf ${intonation} -g ${volume} -ow ${filePath}`);
return filePath;
});
const voicePaths = (await Promise.all(promises)).filter((n) => n) as string[];
const blankPath = path.join(`${filepath}_blank_${between}.wav`);
if (between !== 0) {
await execPromise(`ffmpeg -f lavfi -i anullsrc=r=48000:cl=mono -t ${between} ${blankPath}`);
const voice = voicePaths.map((filePath) => [filePath, blankPath]).flat();
const mergedFilePath = path.join(`${filepath}.wav`);
await execPromise(`ffmpeg -y ${voice.map((path) => `-i ${path}`).join(" ")} -filter_complex "concat=n=${voice.length}:v=0:a=1" ${mergedFilePath}`);
await unlinkPromise(blankPath);
} else {
const mergedFilePath = path.join(`${filepath}.wav`);
await execPromise(`ffmpeg -y ${voicePaths.map((path) => `-i ${path}`).join(" ")} -filter_complex "concat=n=${voicePaths.length}:v=0:a=1" ${mergedFilePath}`);
}
await Promise.all(voicePaths.map((path) => unlinkPromise(path)));
return `${filepath}.wav`;
}
function execPromise(command: string): Promise<void> {
return new Promise<void>((resolve, reject) => {
childProcess.exec(command, (error) => {
if (error) return reject(error);
resolve();
});
});
}
function unlinkPromise(filePath: string): Promise<void> {
if (!fs.existsSync(filePath)) return Promise.resolve();
return fs.promises.unlink(filePath).catch(console.error);
}
export { generateVoice };