diff --git a/paddleaudio/paddleaudio/compliance/__init__.py b/paddleaudio/paddleaudio/compliance/__init__.py index 97043fd7ba6..c08f9ab11ea 100644 --- a/paddleaudio/paddleaudio/compliance/__init__.py +++ b/paddleaudio/paddleaudio/compliance/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from . import kaldi +from . import librosa diff --git a/paddleaudio/tests/features/__init__.py b/paddleaudio/tests/features/__init__.py new file mode 100644 index 00000000000..97043fd7ba6 --- /dev/null +++ b/paddleaudio/tests/features/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/tests/features/base.py b/paddleaudio/tests/features/base.py new file mode 100644 index 00000000000..f3012e43183 --- /dev/null +++ b/paddleaudio/tests/features/base.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import unittest + +import numpy as np +import paddle + +from paddleaudio import load + +file_dir = os.path.dirname(os.path.realpath(__file__)) + + +class FeatTest(unittest.TestCase): + def setUp(self): + self.initParmas() + self.initWavInput() + self.setUpDevice() + + def setUpDevice(self, device='cpu'): + paddle.set_device(device) + + def initWavInput(self): + self.waveform, self.sr = load( + os.path.abspath(os.path.join(file_dir, '../wav/zh.wav'))) + self.waveform = self.waveform.astype( + np.float32 + ) # paddlespeech.s2t.transform.spectrogram only supports float32 + dim = len(self.waveform.shape) + + assert dim in [1, 2] + if dim == 1: + self.waveform = np.expand_dims(self.waveform, 0) + + def initParmas(self): + raise NotImplementedError diff --git a/paddleaudio/tests/features/istft.py b/paddleaudio/tests/features/istft.py new file mode 100644 index 00000000000..2d9097c7338 --- /dev/null +++ b/paddleaudio/tests/features/istft.py @@ -0,0 +1,49 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np +import paddle + +from .base import FeatTest +from paddleaudio.functional.window import get_window +from paddlespeech.s2t.transform.spectrogram import IStft +from paddlespeech.s2t.transform.spectrogram import Stft + + +class ISTFT(FeatTest): + def initParmas(self): + self.n_fft = 512 + self.hop_length = 128 + self.window_str = 'hann' + + def test_istft(self): + ps_stft = Stft(self.n_fft, self.hop_length) + ps_res = ps_stft( + self.waveform.T).squeeze(1).T # (n_fft//2 + 1, n_frmaes) + x = paddle.to_tensor(ps_res) + + ps_istft = IStft(self.hop_length) + ps_res = ps_istft(ps_res.T) + + window = get_window( + self.window_str, self.n_fft, dtype=self.waveform.dtype) + pd_res = paddle.signal.istft( + x, self.n_fft, self.hop_length, window=window) + + np.testing.assert_array_almost_equal(ps_res, pd_res, decimal=5) + + +if __name__ == '__main__': + unittest.main() diff --git a/paddleaudio/tests/features/log_melspectrogram.py b/paddleaudio/tests/features/log_melspectrogram.py new file mode 100644 index 00000000000..ee54f5db1ed --- /dev/null +++ b/paddleaudio/tests/features/log_melspectrogram.py @@ -0,0 +1,50 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np +import paddle + +import paddleaudio +from .base import FeatTest +from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram + + +class LogMelSpect(FeatTest): + def initParmas(self): + self.n_fft = 512 + self.hop_length = 128 + self.n_mels = 40 + + def test_log_melspect(self): + ps_melspect = LogMelSpectrogram(self.sr, self.n_mels, self.n_fft, + self.hop_length) + ps_res = ps_melspect(self.waveform.T).squeeze(1).T + + x = paddle.to_tensor(self.waveform) + # paddlespeech.s2t的特征存在幅度谱和功率谱滥用的情况 + ps_melspect = paddleaudio.features.LogMelSpectrogram( + self.sr, + self.n_fft, + self.hop_length, + power=1.0, + n_mels=self.n_mels, + f_min=0.0) + pa_res = (ps_melspect(x) / 10.0).squeeze(0).numpy() + + np.testing.assert_array_almost_equal(ps_res, pa_res, decimal=5) + + +if __name__ == '__main__': + unittest.main() diff --git a/paddleaudio/tests/features/spectrogram.py b/paddleaudio/tests/features/spectrogram.py new file mode 100644 index 00000000000..da71d639735 --- /dev/null +++ b/paddleaudio/tests/features/spectrogram.py @@ -0,0 +1,42 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np +import paddle + +import paddleaudio +from .base import FeatTest +from paddlespeech.s2t.transform.spectrogram import Spectrogram + + +class Spect(FeatTest): + def initParmas(self): + self.n_fft = 512 + self.hop_length = 128 + + def test_spectrogram(self): + ps_spect = Spectrogram(self.n_fft, self.hop_length) + ps_res = ps_spect(self.waveform.T).squeeze(1).T # Magnitude + + x = paddle.to_tensor(self.waveform) + pa_spect = paddleaudio.features.Spectrogram( + self.n_fft, self.hop_length, power=1.0) + pa_res = pa_spect(x).squeeze(0).numpy() + + np.testing.assert_array_almost_equal(ps_res, pa_res, decimal=5) + + +if __name__ == '__main__': + unittest.main() diff --git a/paddleaudio/tests/features/stft.py b/paddleaudio/tests/features/stft.py new file mode 100644 index 00000000000..352d6dfa117 --- /dev/null +++ b/paddleaudio/tests/features/stft.py @@ -0,0 +1,44 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np +import paddle + +from .base import FeatTest +from paddleaudio.functional.window import get_window +from paddlespeech.s2t.transform.spectrogram import Stft + + +class STFT(FeatTest): + def initParmas(self): + self.n_fft = 512 + self.hop_length = 128 + self.window_str = 'hann' + + def test_stft(self): + ps_stft = Stft(self.n_fft, self.hop_length) + ps_res = ps_stft( + self.waveform.T).squeeze(1).T # (n_fft//2 + 1, n_frmaes) + + x = paddle.to_tensor(self.waveform) + window = get_window(self.window_str, self.n_fft, dtype=x.dtype) + pd_res = paddle.signal.stft( + x, self.n_fft, self.hop_length, window=window).squeeze(0).numpy() + + np.testing.assert_array_almost_equal(ps_res, pd_res, decimal=5) + + +if __name__ == '__main__': + unittest.main() diff --git a/paddleaudio/tests/wav/zh.wav b/paddleaudio/tests/wav/zh.wav new file mode 100644 index 00000000000..7534f624c04 Binary files /dev/null and b/paddleaudio/tests/wav/zh.wav differ diff --git a/paddlespeech/s2t/__init__.py b/paddlespeech/s2t/__init__.py index 855ceef96f5..28afb33978d 100644 --- a/paddlespeech/s2t/__init__.py +++ b/paddlespeech/s2t/__init__.py @@ -21,6 +21,7 @@ from paddle.fluid import core from paddle.nn import functional as F +from . import transform from paddlespeech.s2t.utils.log import Log #TODO(Hui Zhang): remove fluid import diff --git a/paddlespeech/s2t/transform/__init__.py b/paddlespeech/s2t/transform/__init__.py index 185a92b8d94..6ba83616c3d 100644 --- a/paddlespeech/s2t/transform/__init__.py +++ b/paddlespeech/s2t/transform/__init__.py @@ -11,3 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from . import spectrogram