-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Berry: add tensorflow lite for microcontrollers (#18119)
Co-authored-by: Christian Baars <[email protected]>
- Loading branch information
Showing
466 changed files
with
125,929 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/******************************************************************** | ||
* Tasmota lib | ||
* | ||
* To use: import TFL` | ||
*******************************************************************/ | ||
#include "be_constobj.h" | ||
#include "be_mapping.h" | ||
|
||
#ifdef USE_BERRY_TF_LITE | ||
|
||
|
||
extern const char* be_TFL_log(struct bvm *vm); | ||
BE_FUNC_CTYPE_DECLARE(be_TFL_log, "s", "@"); | ||
|
||
extern const char* be_TFL_stats(struct bvm *vm); | ||
BE_FUNC_CTYPE_DECLARE(be_TFL_stats, "s", "@"); | ||
|
||
extern bbool be_TFL_begin(struct bvm *vm, const char* type, const uint8_t *descriptor, size_t size); | ||
BE_FUNC_CTYPE_DECLARE(be_TFL_begin, "b", "@s[(bytes)~]"); | ||
|
||
extern bbool be_TFL_load(struct bvm *vm, const uint8_t *model_buf, size_t model_size, const uint8_t *output_buf, size_t output_size,int arena); | ||
BE_FUNC_CTYPE_DECLARE(be_TFL_load, "b", "@(bytes)~(bytes)~[i]"); | ||
|
||
extern bbool be_TFL_input(struct bvm *vm, const uint8_t *buf, size_t size); | ||
BE_FUNC_CTYPE_DECLARE(be_TFL_input, "b", "@(bytes)~"); | ||
|
||
extern bbool be_TFL_output(struct bvm *vm, const uint8_t *buf, size_t size); | ||
BE_FUNC_CTYPE_DECLARE(be_TFL_output, "b", "@(bytes)~"); | ||
|
||
extern void be_TFL_rec(struct bvm *vm, const char* filename, size_t seconds); | ||
BE_FUNC_CTYPE_DECLARE(be_TFL_rec, "", "@si"); | ||
|
||
#include "be_fixed_TFL.h" | ||
|
||
/* @const_object_info_begin | ||
module TFL (scope: global) { | ||
begin, ctype_func(be_TFL_begin) | ||
load, ctype_func(be_TFL_load) | ||
input, ctype_func(be_TFL_input) | ||
output, ctype_func(be_TFL_output) | ||
log, ctype_func(be_TFL_log) | ||
stats, ctype_func(be_TFL_stats) | ||
rec, ctype_func(be_TFL_rec) | ||
} | ||
@const_object_info_end */ | ||
|
||
#endif // USE_BERRY_TF_LITE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name=MElFreqencyExtractor | ||
version=1.0 | ||
author=Christian Baars | ||
maintainer=Christian Baars | ||
sentence=Feature Extractor using mel frequencies | ||
paragraph=Uses ESP-DSP library. | ||
architectures=esp32 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,315 @@ | ||
/* | ||
mfcc.h - mel frequency extractor for ESP32 | ||
Computes features for slizes of audio data similiar to speechpy | ||
This is intended to provide a stripped down implementation that can work with Edgempulse trained models | ||
based on: | ||
https://github.com/astorfi/speechpy | ||
https://github.com/AIWintermuteAI/Speech-to-Intent-Micro/blob/main/inference_code/Wio_Terminal/wio_speech_to_intent_150_10/mfcc.cpp | ||
Copyright (C) 2022 Christian Baars | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>.@ | ||
*/ | ||
|
||
|
||
|
||
#ifndef MELFREQUENCYEXTRACTOR_H | ||
#define MELFREQUENCYEXTRACTOR_H | ||
|
||
#include <Arduino.h> | ||
#include <string.h> | ||
#include "float.h" | ||
#include "esp_dsp.h" | ||
|
||
|
||
class MFCC{ | ||
private: | ||
int num_mfcc_features; | ||
int frame_len; | ||
int frame_len_padded; | ||
int num_bank_bins; | ||
float * m_frame; | ||
float * m_buffer; | ||
float * m_mel_energies; | ||
float * m_dct_matrix; | ||
float * m_mel_fbank; | ||
|
||
uint8_t m_amplification; | ||
float m_preemphasis; | ||
|
||
float * create_dct_matrix(int32_t input_length, int32_t coefficient_count); | ||
void create_mel_filterbank(int samp_freq, int low_freq, int high_freq); | ||
|
||
static inline float InverseMelScale(float mel_freq) { | ||
return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f); | ||
} | ||
|
||
static inline float MelScale(float freq) { | ||
return 1127.0f * logf (1.0f + freq / 700.0f); | ||
} | ||
|
||
|
||
public: | ||
MFCC(int num_mfcc_features, int frame_len,int num_bank_bins, int samp_freq, int low_freq, int high_freq); | ||
~MFCC(); | ||
void set_preamp(uint8_t amplification); | ||
void set_preemphasis(float preemphasis); | ||
void mfcc_compute(const int16_t* data, float* mfcc_out); | ||
void log10_normalize(float* out_buf, int out_buf_len, int noise_floor_db); | ||
}; | ||
|
||
|
||
MFCC::MFCC(int num_mfcc_features, int frame_len, int num_bank_bins, int samp_freq, int low_freq, int high_freq) | ||
:num_mfcc_features(num_mfcc_features), | ||
frame_len(frame_len), | ||
num_bank_bins(num_bank_bins) | ||
{ | ||
// Round-up to nearest power of 2. | ||
frame_len_padded = pow(2,ceil((log(frame_len)/log(2)))); | ||
|
||
m_frame = new float[frame_len_padded]; | ||
m_buffer = new float[frame_len_padded * 2]; | ||
m_mel_energies = new float[num_bank_bins]; | ||
|
||
//create window function | ||
// window_func = new float[frame_len]; | ||
// dsps_wind_hann_f32(window_func, frame_len); | ||
|
||
m_amplification = 1; | ||
m_preemphasis = 0.0; | ||
|
||
//create mel filterbank | ||
create_mel_filterbank(samp_freq, low_freq, high_freq); | ||
|
||
//create DCT matrix for mfcc mode | ||
if(num_mfcc_features != 0){ | ||
m_dct_matrix = create_dct_matrix(num_bank_bins, num_mfcc_features); | ||
} | ||
|
||
//initialize FFT | ||
int ret = dsps_fft2r_init_fc32(NULL, frame_len_padded); | ||
if(ret==0){ | ||
MicroPrintf("Framelength: %u, (rounded: %u)", frame_len,frame_len_padded); | ||
} | ||
else{ | ||
MicroPrintf("dsps_fft2r_init_fc32 error: %d",ret); | ||
} | ||
|
||
} | ||
|
||
MFCC::~MFCC() { | ||
delete []m_frame; | ||
delete []m_buffer; | ||
delete []m_mel_energies; | ||
// delete []window_func; | ||
|
||
delete []m_dct_matrix; | ||
|
||
// for(int i=0;i<num_bank_bins;i++) | ||
// delete mel_fbank[i]; | ||
delete [] m_mel_fbank; | ||
dsps_fft2r_deinit_fc32(); | ||
} | ||
|
||
float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) { | ||
int32_t k, n; | ||
float * M = new float[input_length*coefficient_count]; | ||
|
||
float normalizer = sqrt(2.0/(float)input_length); | ||
for (k = 0; k < coefficient_count; k++) { | ||
for (n = 0; n < input_length; n++) { | ||
M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k ); | ||
} | ||
} | ||
return M; | ||
} | ||
|
||
|
||
void MFCC::create_mel_filterbank(int samp_freq, int low_freq, int high_freq) { | ||
// MicroPrintf("Create FB ..."); | ||
int coefficients = frame_len_padded/2 + 1; | ||
m_mel_fbank = new float[num_bank_bins * coefficients](); // zero-init | ||
uint32_t delta = (MelScale(high_freq) - MelScale(low_freq))/(num_bank_bins + 1); | ||
|
||
float mels[num_bank_bins+2]; | ||
float hertz[num_bank_bins+2]; | ||
int freq_index[num_bank_bins+2]; | ||
|
||
for (int i = 0;i<num_bank_bins+2;i++){ | ||
mels[i] = MelScale(low_freq) + (i * delta); | ||
hertz[i] = InverseMelScale(mels[i]); | ||
freq_index[i] = (coefficients + 1) * hertz[i] /samp_freq; | ||
} | ||
|
||
for (int i = 0;i<num_bank_bins;i++ ){ | ||
int left = int(freq_index[i]); | ||
int middle = int(freq_index[i + 1]); | ||
int right = int(freq_index[i + 2]); | ||
float slope_up = 1/float(middle-left); | ||
float slope_down = 1/float(right-middle); | ||
// MicroPrintf("%u %u %u %f %f",left,middle,right,slope_up,slope_down); | ||
for (int j = 0;j<(right-left+1);j++){ | ||
if (j < middle-left + 1){ | ||
m_mel_fbank[(i*coefficients)+left+j] = j*slope_up; | ||
} | ||
else{ | ||
m_mel_fbank[(i*coefficients)+left+j] = m_mel_fbank[(i*coefficients)+left+j-1]-slope_down; | ||
} | ||
} | ||
} | ||
// MicroPrintf("%f %f %f %f %f %f %f %f ",m_mel_fbank[5],m_mel_fbank[6] ,m_mel_fbank[7] ,m_mel_fbank[8] ,m_mel_fbank[9] ,m_mel_fbank[10] ,m_mel_fbank[11] ,m_mel_fbank[12]); | ||
// MicroPrintf("FB done"); | ||
} | ||
|
||
|
||
void MFCC::log10_normalize(float* out_buf, int out_buf_len, int noise_floor_db) { | ||
const float noise = static_cast<float>(noise_floor_db * -1); | ||
const float noise_scale = 1.0f / (static_cast<float>(noise_floor_db * -1) + 12.0f); | ||
for (size_t ix = 0; ix < out_buf_len; ix++) { | ||
float f = out_buf[ix]; | ||
if (f < 1e-30) { | ||
out_buf[ix] = 0; | ||
return; | ||
} | ||
f = 10.0f * log10(f); // scale by 10 | ||
f += noise; | ||
f *= noise_scale; | ||
// clip again | ||
if (f < 0.0f) f = 0.0f; | ||
else if (f > 1.0f) f = 1.0f; | ||
out_buf[ix] = f; | ||
} | ||
} | ||
|
||
void MFCC::set_preamp(uint8_t amplification){ | ||
m_amplification = amplification; | ||
} | ||
|
||
void MFCC::set_preemphasis(float preemphasis){ | ||
m_preemphasis = preemphasis; | ||
// Speechpy computes this over the window of a sample, here we will compute only over the slize !! | ||
} | ||
|
||
|
||
void MFCC::mfcc_compute(const int16_t * audio_data, float* mfcc_out) { | ||
|
||
int32_t i, j, bin; | ||
int coefficients = frame_len_padded/2 + 1; | ||
int data_clipped = 0; | ||
int data_clipped_low = 0; | ||
float conv_factor = m_amplification; | ||
float clip_thres = 0.99f * (float)(1<<15); | ||
|
||
// MicroPrintf("%d %d %d %d %d %d %d %d",audio_data[0],audio_data[1] ,audio_data[2] ,audio_data[3] ,audio_data[4] ,audio_data[5] ,audio_data[6] ,audio_data[7]); | ||
|
||
//TensorFlow way of normalizing .wav data to (-1,1) for speechpy's MFE | ||
if(num_mfcc_features == 0){ | ||
conv_factor /= (float)(1<<15); | ||
clip_thres /= (float)(1<<15); | ||
} | ||
|
||
for (int i = 0; i < frame_len; i++) { | ||
m_buffer[i] = audio_data[i] * conv_factor; //mfe -1..1, mfcc int16_t as float, both with additional pre_amp factor | ||
} | ||
if(m_buffer[i]> clip_thres){ | ||
m_buffer[i] /= m_amplification; | ||
data_clipped++; | ||
} | ||
else if( m_buffer[i]< -clip_thres){ | ||
m_buffer[i] /= m_amplification; | ||
data_clipped_low++; | ||
} | ||
|
||
if(data_clipped>0) | ||
MicroPrintf("Clip: %d __ %d",data_clipped, data_clipped_low); | ||
|
||
// MicroPrintf("%f %f %f %f %f %f %f %f ",m_buffer[0],m_buffer[1] ,m_buffer[2] ,m_buffer[3] ,m_buffer[4] ,m_buffer[5] ,m_buffer[6] ,m_buffer[7]); | ||
|
||
//pre-emphasis | ||
if(m_preemphasis!=0.0){ | ||
m_frame[0] = m_buffer[0] - m_preemphasis * m_buffer[frame_len - 1]; // roll through the frame "back" to the end | ||
for (i = 1; i < frame_len; i++){ | ||
m_frame[i] = m_buffer[i] - m_preemphasis * m_buffer[i - 1]; | ||
} | ||
} | ||
else{ | ||
for (i = 1; i < frame_len; i++){ | ||
m_frame[i] = m_buffer[i]; | ||
} | ||
} | ||
|
||
// prepare buffer for FFT | ||
for (i = 0; i < frame_len_padded; i++) { | ||
m_buffer[i * 2] = i<frame_len ? m_frame[i] : 0; | ||
// m_buffer[i * 2] = i<frame_len ? frame[i] * window_func[i] : 0; // in case we want to use a window function | ||
m_buffer[i*2 + 1] = 0; | ||
} | ||
// MicroPrintf("%f %f %f %f %f %f %f %f ",frame[0],frame[1] ,frame[2] ,frame[3] ,frame[4] ,frame[5] ,frame[6] ,frame[7]); | ||
|
||
//Compute FFT | ||
int err = dsps_fft2r_fc32(m_buffer, frame_len_padded); | ||
err += dsps_bit_rev_fc32(m_buffer, frame_len_padded); //Bit reverse | ||
// err += dsps_cplx2reC_fc32(m_buffer, frame_len_padded);// Complex spectrum in y_cf | ||
|
||
if(err!=0){ | ||
MicroPrintf("dsps_fft2r error: %u %f %f %f %f %f %f %f %f ",err,m_buffer[0],m_buffer[1] ,m_buffer[2] ,m_buffer[3] ,m_buffer[4] ,m_buffer[5] ,m_buffer[512] ,m_buffer[513]); | ||
} | ||
|
||
for (int i = 0 ; i < coefficients ; i++) { | ||
m_buffer[i] = (m_buffer[i*2] * m_buffer[i*2] + m_buffer[i*2 + 1] * m_buffer[i*2 + 1])/frame_len_padded; | ||
} | ||
// MicroPrintf(" pow spec: %f %f %f %f %f %f %f %f ",m_buffer[0],m_buffer[1] ,m_buffer[2] ,m_buffer[3] ,m_buffer[4] ,m_buffer[5] ,m_buffer[255] ,m_buffer[256]); | ||
|
||
//Apply mel filterbanks | ||
for (int i = 0;i<num_bank_bins;i++ ){ | ||
m_mel_energies[i] = 0; | ||
for (int j = 0;j<coefficients;j++ ){ | ||
m_mel_energies[i] += m_buffer[j] * m_mel_fbank[(i * coefficients) + j]; | ||
} | ||
} | ||
|
||
// for MFE copy and return - compute 10 * log10() later explicitely | ||
if(num_mfcc_features == 0){ | ||
for (bin = 0; bin < num_bank_bins; bin++){ | ||
mfcc_out[bin] = m_mel_energies[bin]; | ||
} | ||
// MicroPrintf("%u feat: %f %f %f %f %f %f %f %f ",num_bank_bins,mfcc_out[0],mfcc_out[1] ,mfcc_out[2] ,mfcc_out[3] ,mfcc_out[4] ,mfcc_out[5] ,mfcc_out[6] ,mfcc_out[7]); | ||
return; | ||
} | ||
|
||
// Continue for MFCC | ||
// Take log | ||
for (bin = 0; bin < num_bank_bins; bin++){ | ||
m_mel_energies[bin] = logf(m_mel_energies[bin]); | ||
} | ||
|
||
// Take DCT. Uses matrix mul. | ||
for (i = 1; i < num_mfcc_features; i++) { | ||
float sum = 0.0; | ||
for (j = 0; j < num_bank_bins; j++) { | ||
sum += m_dct_matrix[i*num_bank_bins+j] * m_mel_energies[j]; | ||
} | ||
mfcc_out[i] = sum; | ||
} | ||
|
||
// replace first cepstral coefficient with log of frame energy for DC elimination | ||
for (i=0; i<frame_len_padded; i++){ | ||
mfcc_out[0] += m_buffer[i]; | ||
} | ||
mfcc_out[0] = logf(mfcc_out[0]); | ||
} | ||
|
||
|
||
#endif //MELFREQUENCYEXTRACTOR_H |
Oops, something went wrong.