diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 3535cfbe..5b5704dd 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -11,6 +11,12 @@ set(COMMON_SRC add_library(RnNoisePluginCommon STATIC ${COMMON_SRC}) +find_package(PkgConfig REQUIRED) +pkg_check_modules(SAMPLERATE REQUIRED samplerate) +target_link_libraries(RnNoisePluginCommon ${SAMPLERATE_LIBRARIES}) +target_include_directories(RnNoisePluginCommon PUBLIC ${SAMPLERATE_INCLUDE_DIRS}) +target_compile_options(RnNoisePluginCommon PUBLIC ${SAMPLERATE_CFLAGS_OTHER}) + target_link_libraries(RnNoisePluginCommon RnNoise) target_include_directories(RnNoisePluginCommon PUBLIC diff --git a/src/common/include/common/RnNoiseCommonPlugin.h b/src/common/include/common/RnNoiseCommonPlugin.h index 46c95fa7..cd52867e 100644 --- a/src/common/include/common/RnNoiseCommonPlugin.h +++ b/src/common/include/common/RnNoiseCommonPlugin.h @@ -3,30 +3,45 @@ #include #include +#include + struct DenoiseState; class RnNoiseCommonPlugin { public: + RnNoiseCommonPlugin(); + + void setSampleRate(unsigned long sampleRate); - void init(); + bool init(); void deinit(); + const char * getError() + { + return m_errorStr; + } + void process(const float *in, float *out, int32_t sampleFrames); private: + const char * m_errorStr; - void createDenoiseState(); + bool m_initialized; + bool m_resample; -private: static const int k_denoiseFrameSize = 480; static const int k_denoiseSampleRate = 48000; + std::shared_ptr m_srcIn; + std::shared_ptr m_srcOut; + double m_downRatio; + double m_upRatio; std::shared_ptr m_denoiseState; - std::vector m_inputBuffer; - std::vector m_outputBuffer; -}; - - - + std::vector m_inBuffer; + std::vector m_outBuffer; + size_t m_outBufferR; + size_t m_outBufferW; + size_t m_outBufferA; +}; \ No newline at end of file diff --git a/src/common/src/RnNoiseCommonPlugin.cpp b/src/common/src/RnNoiseCommonPlugin.cpp index 9f9af9b0..8ac21a6f 100644 --- a/src/common/src/RnNoiseCommonPlugin.cpp +++ b/src/common/src/RnNoiseCommonPlugin.cpp @@ -7,83 +7,191 @@ #include -void RnNoiseCommonPlugin::init() { - deinit(); - createDenoiseState(); +RnNoiseCommonPlugin::RnNoiseCommonPlugin() : + m_errorStr (NULL), + m_initialized(false), + m_resample (false) +{ } -void RnNoiseCommonPlugin::deinit() { - m_denoiseState.reset(); +void RnNoiseCommonPlugin::setSampleRate(unsigned long sampleRate) +{ + m_downRatio = (double)k_denoiseSampleRate / (double)sampleRate; + m_upRatio = (double)sampleRate / (double)k_denoiseSampleRate; + m_resample = sampleRate != 48000; } -void RnNoiseCommonPlugin::process(const float *in, float *out, int32_t sampleFrames) { - if (sampleFrames == 0) { - return; - } - - if (!m_denoiseState) { - createDenoiseState(); - } +bool RnNoiseCommonPlugin::init() { + int err; - // Good case, we can copy less data around and rnnoise lib is built for it - if (sampleFrames == k_denoiseFrameSize) { - m_inputBuffer.resize(sampleFrames); + if (m_initialized) + deinit(); - for (size_t i = 0; i < sampleFrames; i++) { - m_inputBuffer[i] = in[i] * std::numeric_limits::max(); - } + m_srcIn = std::shared_ptr( + src_new(SRC_SINC_BEST_QUALITY, 1, &err), + [](SRC_STATE *st) + { + src_delete(st); + } + ); + + if (err) + { + m_errorStr = src_strerror(err); + return false; + } + + m_srcOut = std::shared_ptr( + src_new(SRC_SINC_BEST_QUALITY, 1, &err), + [](SRC_STATE *st) + { + src_delete(st); + } + ); + + if (err) + { + m_srcIn.reset(); + m_errorStr = src_strerror(err); + return false; + } + + m_denoiseState = std::shared_ptr( + rnnoise_create(), + [](DenoiseState *st) + { + rnnoise_destroy(st); + } + ); - rnnoise_process_frame(m_denoiseState.get(), out, &m_inputBuffer[0]); + src_set_ratio(m_srcIn.get(), m_downRatio); + src_set_ratio(m_srcOut .get(), m_upRatio ); - for (size_t i = 0; i < sampleFrames; i++) { - out[i] /= std::numeric_limits::max(); - } - } else { - m_inputBuffer.resize(m_inputBuffer.size() + sampleFrames); + m_inBuffer .resize(k_denoiseFrameSize); + m_outBuffer.resize(k_denoiseFrameSize * 2); + m_outBufferR = 0; + m_outBufferW = 0; + m_outBufferA = 0; - // From [-1.f,1.f] range to [min short, max short] range which rnnoise lib will understand - { - float *inputBufferWriteStart = (m_inputBuffer.end() - sampleFrames).base(); - for (size_t i = 0; i < sampleFrames; i++) { - inputBufferWriteStart[i] = in[i] * std::numeric_limits::max(); - } - } + m_initialized = true; + m_errorStr = NULL; + return true; +} - const size_t samplesToProcess = m_inputBuffer.size() / k_denoiseFrameSize; - const size_t framesToProcess = samplesToProcess * k_denoiseFrameSize; +void RnNoiseCommonPlugin::deinit() { + m_denoiseState.reset(); + m_srcIn .reset(); + m_srcOut .reset(); + m_initialized = false; +} - m_outputBuffer.resize(m_outputBuffer.size() + framesToProcess); +void RnNoiseCommonPlugin::process(const float *in, float *out, int32_t sampleFrames) +{ + const float mul = 1.0f / std::numeric_limits::max(); + if (!sampleFrames) + return; + + if (!m_initialized) + init(); + + SRC_DATA srcIn; + srcIn.data_in = in; + srcIn.input_frames = sampleFrames; + srcIn.end_of_input = 0; + srcIn.src_ratio = m_downRatio; + srcIn.data_out = &m_inBuffer[0]; + srcIn.output_frames = m_inBuffer.size(); + + SRC_DATA srcOut; + srcOut.data_out = out; + srcOut.output_frames = sampleFrames; + srcOut.end_of_input = 0; + srcOut.src_ratio = m_upRatio; + + long frames = 0; + while(srcIn.input_frames) + { + if (m_resample) + { + // resample the samples and then scale them + src_process(m_srcIn.get(), &srcIn); + for(long i = 0; i < srcIn.output_frames_gen; ++i) + m_inBuffer[i] *= std::numeric_limits::max(); + } + else + { + // just copy the data and scale it + srcIn.input_frames_used = srcIn.input_frames; + if (srcIn.input_frames_used > srcIn.output_frames) + srcIn.input_frames_used = srcIn.output_frames; + srcIn.output_frames_gen = srcIn.input_frames_used; + + for(long i = 0; i < srcIn.output_frames_gen; ++i) + m_inBuffer[i] = in[i] * std::numeric_limits::max(); + } - // Process input buffer by chunks of k_denoiseFrameSize, put result into out buffer to return into range [-1.f,1.f] + srcIn.data_in += srcIn.input_frames_used; + srcIn.input_frames -= srcIn.input_frames_used; + + float *denoise_in = &m_inBuffer[0]; + while(srcIn.output_frames_gen) + { + const int wrote = rnnoise_add_samples(m_denoiseState.get(), denoise_in, srcIn.output_frames_gen); + denoise_in += wrote; + srcIn.output_frames_gen -= wrote; + + if (rnnoise_get_needed(m_denoiseState.get()) == 0) + { + rnnoise_process_frame(m_denoiseState.get(), &m_outBuffer[m_outBufferW]); + + // scale the levels back to normal + for(int32_t i = 0; i < k_denoiseFrameSize; ++i) + m_outBuffer[m_outBufferW + i] *= mul; + + m_outBufferW += k_denoiseFrameSize; + m_outBufferA += k_denoiseFrameSize; + if (m_outBufferW == m_outBuffer.size()) + m_outBufferW = 0; + } + + // resample what we can to the output + while(m_outBufferA && srcOut.output_frames) + { + srcOut.data_in = &m_outBuffer[m_outBufferR]; + srcOut.input_frames = m_outBufferW < m_outBufferR ? m_outBuffer.size() - m_outBufferR : m_outBufferW - m_outBufferR; + + if (m_resample) + src_process(m_srcOut.get(), &srcOut); + else { - float *outBufferWriteStart = (m_outputBuffer.end() - framesToProcess).base(); - - for (size_t i = 0; i < samplesToProcess; i++) { - float *currentOutBuffer = &outBufferWriteStart[i * k_denoiseFrameSize]; - float *currentInBuffer = &m_inputBuffer[i * k_denoiseFrameSize]; - rnnoise_process_frame(m_denoiseState.get(), currentOutBuffer, currentInBuffer); - - for (size_t j = 0; j < k_denoiseFrameSize; j++) { - currentOutBuffer[j] /= std::numeric_limits::max(); - } - } + // simply copy the buffer if we are not resampling + srcOut.input_frames_used = srcOut.input_frames; + if (srcOut.input_frames_used > srcOut.output_frames) + srcOut.input_frames_used = srcOut.output_frames; + memcpy(srcOut.data_out, srcOut.data_in, srcOut.input_frames_used * sizeof(float)); } - const size_t toCopyIntoOutput = std::min(m_outputBuffer.size(), static_cast(sampleFrames)); + if (!srcOut.input_frames_used && !srcOut.output_frames_gen) + break; - std::memcpy(out, &m_outputBuffer[0], toCopyIntoOutput * sizeof(float)); + m_outBufferR += srcOut.input_frames_used; + m_outBufferA -= srcOut.input_frames_used; - m_inputBuffer.erase(m_inputBuffer.begin(), m_inputBuffer.begin() + framesToProcess); - m_outputBuffer.erase(m_outputBuffer.begin(), m_outputBuffer.begin() + toCopyIntoOutput); + srcOut.data_out += srcOut.output_frames_gen; + srcOut.output_frames -= srcOut.output_frames_gen; + frames += srcOut.output_frames_gen; - if (toCopyIntoOutput < sampleFrames) { - std::fill(out + toCopyIntoOutput, out + sampleFrames, 0.f); - } + if (m_outBufferR == m_outBuffer.size()) + m_outBufferR = 0; + } } -} - -void RnNoiseCommonPlugin::createDenoiseState() { - m_denoiseState = std::shared_ptr(rnnoise_create(), [](DenoiseState *st) { - rnnoise_destroy(st); - }); + } + + // if we generated less frames then wanted, pad them across to the right + if (frames && frames < sampleFrames) + { + const size_t pad = sampleFrames - frames; + memmove(out + pad, out, frames); + memset(out, 0, pad); + } } \ No newline at end of file diff --git a/src/ladspa_plugin/RnNoiseLadspaPlugin.h b/src/ladspa_plugin/RnNoiseLadspaPlugin.h index 28a8e8d8..f0b6fb8a 100644 --- a/src/ladspa_plugin/RnNoiseLadspaPlugin.h +++ b/src/ladspa_plugin/RnNoiseLadspaPlugin.h @@ -32,8 +32,9 @@ struct RnNoiseMono { nullptr // implementation data }; - RnNoiseMono() { + RnNoiseMono(sample_rate_t sr) { m_rnNoisePlugin.init(); + m_rnNoisePlugin.setSampleRate(sr); } ~RnNoiseMono() { @@ -82,9 +83,11 @@ struct RnNoiseStereo { nullptr // implementation data }; - RnNoiseStereo() { + RnNoiseStereo(sample_rate_t sr) { m_rnNoisePluginL.init(); m_rnNoisePluginR.init(); + m_rnNoisePluginL.setSampleRate(sr); + m_rnNoisePluginR.setSampleRate(sr); } ~RnNoiseStereo() { diff --git a/src/lv2_plugin/RnNoiseLv2Plugin.cpp b/src/lv2_plugin/RnNoiseLv2Plugin.cpp index 7588652c..3ee04b7c 100644 --- a/src/lv2_plugin/RnNoiseLv2Plugin.cpp +++ b/src/lv2_plugin/RnNoiseLv2Plugin.cpp @@ -7,6 +7,7 @@ RnNoiseLv2Plugin::RnNoiseLv2Plugin(double sample_rate, const char *bundle_path, (*valid) = true; m_rnNoisePlugin = std::make_unique(); + m_rnNoisePlugin->setSampleRate(sample_rate); } @@ -47,4 +48,4 @@ void RnNoiseLv2Plugin::deactivate() { PluginBase::deactivate(); m_rnNoisePlugin->deinit(); -} +} \ No newline at end of file diff --git a/src/rnnoise/include/rnnoise/rnnoise.h b/src/rnnoise/include/rnnoise/rnnoise.h index 8e41cc51..002c87c2 100644 --- a/src/rnnoise/include/rnnoise/rnnoise.h +++ b/src/rnnoise/include/rnnoise/rnnoise.h @@ -52,7 +52,12 @@ RNNOISE_EXPORT DenoiseState *rnnoise_create(); RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); -RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in); +RNNOISE_EXPORT int rnnoise_get_needed(DenoiseState *st); + +RNNOISE_EXPORT int rnnoise_add_samples(DenoiseState *st, const float *in, int in_len); + +RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out); + #ifdef __cplusplus } diff --git a/src/rnnoise/src/denoise.c b/src/rnnoise/src/denoise.c index caf9a899..57e3e1d0 100644 --- a/src/rnnoise/src/denoise.c +++ b/src/rnnoise/src/denoise.c @@ -85,6 +85,9 @@ typedef struct { } CommonState; struct DenoiseState { + float input[FRAME_SIZE]; + int input_pos; + float analysis_mem[FRAME_SIZE]; float cepstral_mem[CEPS_MEM][NB_BANDS]; int memid; @@ -469,11 +472,27 @@ void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const } } -float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) { +int rnnoise_get_needed(DenoiseState *st) { + return FRAME_SIZE - st->input_pos; +} + +int rnnoise_add_samples(DenoiseState *st, const float *in, int in_len) { + static const float a_hp[2] = {-1.99599, 0.99600}; + static const float b_hp[2] = {-2, 1}; + + const int needed = FRAME_SIZE - st->input_pos; + const int take = needed > in_len ? in_len : needed; + + biquad(st->input + st->input_pos, st->mem_hp_x, in, b_hp, a_hp, take); + st->input_pos += take; + + return take; +} + +float rnnoise_process_frame(DenoiseState *st, float *out) { int i; kiss_fft_cpx X[FREQ_SIZE]; kiss_fft_cpx P[WINDOW_SIZE]; - float x[FRAME_SIZE]; float Ex[NB_BANDS], Ep[NB_BANDS]; float Exp[NB_BANDS]; float features[NB_FEATURES]; @@ -481,10 +500,9 @@ float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) { float gf[FREQ_SIZE]={1}; float vad_prob = 0; int silence; - static const float a_hp[2] = {-1.99599, 0.99600}; - static const float b_hp[2] = {-2, 1}; - biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE); - silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x); + + silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, st->input); + st->input_pos = 0; if (!silence) { compute_rnn(&st->rnn, g, &vad_prob, features);