forked from rhasspy/piper-sample-generator
-
Notifications
You must be signed in to change notification settings - Fork 1
/
augment.py
88 lines (70 loc) · 2.55 KB
/
augment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3
import argparse
import audioop
import sys
import wave
from pathlib import Path
import numpy as np
from audiomentations import Compose, ApplyImpulseResponse, Gain
_DIR = Path(__file__).parent
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("input_dir")
parser.add_argument("output_dir")
parser.add_argument("--sample-rate", type=int)
args = parser.parse_args()
impulses = list((_DIR / "impulses").glob("*.wav"))
augment = Compose(
transforms=[
Gain(min_gain_in_db=-12, max_gain_in_db=0),
ApplyImpulseResponse(impulses),
]
)
input_dir = Path(args.input_dir)
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
for input_wav in input_dir.glob("*.wav"):
output_wav = output_dir / (input_wav.relative_to(input_dir))
output_wav.parent.mkdir(parents=True, exist_ok=True)
with wave.open(str(input_wav), "rb") as input_wav_file, wave.open(
str(output_wav), "wb"
) as output_wav_file:
assert input_wav_file.getsampwidth() == 2
assert input_wav_file.getnchannels() == 1
input_audio = (
np.frombuffer(
input_wav_file.readframes(input_wav_file.getnframes()),
dtype=np.int16,
).astype(np.float32)
/ 32767.0
)
output_audio = augment(
input_audio, sample_rate=input_wav_file.getframerate()
)
output_wav_file.setframerate(
args.sample_rate or input_wav_file.getframerate()
)
output_wav_file.setsampwidth(2)
output_wav_file.setnchannels(1)
output_audio_16 = audio_float_to_int16(output_audio)
if args.sample_rate != input_wav_file.getframerate():
output_audio_16, _state = audioop.ratecv(
output_audio_16,
2,
1,
input_wav_file.getframerate(),
args.sample_rate,
None,
)
output_wav_file.writeframes(output_audio_16)
print(output_wav)
def audio_float_to_int16(
audio: np.ndarray, max_wav_value: float = 32767.0
) -> np.ndarray:
# Don't normalize
audio_norm = audio * max_wav_value
audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
audio_norm = audio_norm.astype("int16")
return audio_norm
if __name__ == "__main__":
main()