Skip to content

Commit

Permalink
Addressed PR comments
Browse files Browse the repository at this point in the history
Signed-off-by: Ante Jukić <[email protected]>
  • Loading branch information
anteju committed Jun 30, 2024
1 parent 43aceb2 commit c79d0c6
Show file tree
Hide file tree
Showing 11 changed files with 541 additions and 555 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# Training the model
Basic run (on CPU for 50 epochs):
python examples/audio/speech_enhancement.py \
python examples/audio/audio_to_audio_train.py \
# (Optional: --config-path=<path to dir of configs> --config-name=<name of config without .yaml>) \
model.train_ds.manifest_filepath="<path to manifest file>" \
model.validation_ds.manifest_filepath="<path to manifest file>" \
Expand Down
2 changes: 1 addition & 1 deletion examples/audio/conf/predictive.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ model:
scale: ${model.encoder.scale}

estimator:
_target_: nemo.collections.audio.parts.submodules.diffusion.SpectrogramNoiseConditionalScoreNetworkPlusPlus
_target_: nemo.collections.audio.parts.submodules.ncsnpp.SpectrogramNoiseConditionalScoreNetworkPlusPlus
in_channels: 1 # single-channel noisy input
out_channels: 1 # single-channel estimate
num_res_blocks: 3 # increased number of res blocks
Expand Down
2 changes: 1 addition & 1 deletion examples/audio/conf/score_based_generative.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ model:
scale: ${model.encoder.scale}

estimator:
_target_: nemo.collections.audio.parts.submodules.diffusion.SpectrogramNoiseConditionalScoreNetworkPlusPlus
_target_: nemo.collections.audio.parts.submodules.ncsnpp.SpectrogramNoiseConditionalScoreNetworkPlusPlus
in_channels: 2 # concatenation of single-channel perturbed and noisy
out_channels: 1 # single-channel score estimate
conditioned_on_time: true
Expand Down
22 changes: 22 additions & 0 deletions nemo/collections/asr/parts/preprocessing/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,28 @@ def select_channels(signal: npt.NDArray, channel_selector: Optional[ChannelSelec
return signal


def get_samples(audio_file: str, target_sr: int = 16000, dtype: str = 'float32'):
"""
Read the samples from the given audio_file path. If not specified, the input audio file is automatically
resampled to 16kHz.
Args:
audio_file (str):
Path to the input audio file
target_sr (int):
Targeted sampling rate
Returns:
samples (numpy.ndarray):
Time-series sample data from the given audio file
"""
with sf.SoundFile(audio_file, 'r') as f:
samples = f.read(dtype=dtype)
if f.samplerate != target_sr:
samples = librosa.core.resample(samples, orig_sr=f.samplerate, target_sr=target_sr)
samples = samples.transpose()
return samples


class AudioSegment(object):
"""Audio segment abstraction.
:param samples: Audio samples [num_samples x num_channels].
Expand Down
38 changes: 0 additions & 38 deletions nemo/collections/asr/parts/utils/audio_utils.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
CTCDecoding,
CTCDecodingConfig,
)
from nemo.collections.asr.parts.utils.audio_utils import get_samples
from nemo.collections.asr.parts.preprocessing.segment import get_samples
from nemo.collections.asr.parts.utils.speaker_utils import audio_rttm_map, get_uniqname_from_filepath
from nemo.collections.asr.parts.utils.streaming_utils import AudioFeatureIterator, FrameBatchASR
from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/asr/parts/utils/streaming_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from nemo.collections.asr.models.ctc_bpe_models import EncDecCTCModelBPE
from nemo.collections.asr.parts.mixins.streaming import StreamingEncoder
from nemo.collections.asr.parts.preprocessing.features import normalize_batch
from nemo.collections.asr.parts.utils.audio_utils import get_samples
from nemo.collections.asr.parts.preprocessing.segment import get_samples
from nemo.core.classes import IterableDataset
from nemo.core.neural_types import LengthsType, MelSpectrogramType, NeuralType

Expand Down
Loading

0 comments on commit c79d0c6

Please sign in to comment.