Skip to content

Commit

Permalink
Berry: add tensorflow lite for microcontrollers (#18119)
Browse files Browse the repository at this point in the history
Co-authored-by: Christian Baars <[email protected]>
  • Loading branch information
Staars and Christian Baars authored Mar 5, 2023
1 parent 0f9bece commit 83f039c
Show file tree
Hide file tree
Showing 466 changed files with 125,929 additions and 0 deletions.
4 changes: 4 additions & 0 deletions lib/libesp32/berry/default/be_modtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ be_extern_native_module(partition_core);
be_extern_native_module(crc);
be_extern_native_module(crypto);
be_extern_native_module(ULP);
be_extern_native_module(TFL);
be_extern_native_module(mdns);
#ifdef USE_ZIGBEE
be_extern_native_module(zigbee);
Expand Down Expand Up @@ -171,6 +172,9 @@ BERRY_LOCAL const bntvmodule* const be_module_table[] = {
#if defined(USE_BERRY_ULP) && ((CONFIG_IDF_TARGET_ESP32) || defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3))
&be_native_module(ULP),
#endif // USE_BERRY_ULP
#if defined(USE_BERRY_TF_LITE)
&be_native_module(TFL),
#endif //USE_BERRY_TF_LITE
#if defined(USE_MI_ESP32) && !defined(USE_BLE_ESP32)
&be_native_module(MI32),
&be_native_module(BLE),
Expand Down
47 changes: 47 additions & 0 deletions lib/libesp32/berry_tasmota/src/be_TFL_lib.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/********************************************************************
* Tasmota lib
*
* To use: import TFL`
*******************************************************************/
#include "be_constobj.h"
#include "be_mapping.h"

#ifdef USE_BERRY_TF_LITE


extern const char* be_TFL_log(struct bvm *vm);
BE_FUNC_CTYPE_DECLARE(be_TFL_log, "s", "@");

extern const char* be_TFL_stats(struct bvm *vm);
BE_FUNC_CTYPE_DECLARE(be_TFL_stats, "s", "@");

extern bbool be_TFL_begin(struct bvm *vm, const char* type, const uint8_t *descriptor, size_t size);
BE_FUNC_CTYPE_DECLARE(be_TFL_begin, "b", "@s[(bytes)~]");

extern bbool be_TFL_load(struct bvm *vm, const uint8_t *model_buf, size_t model_size, const uint8_t *output_buf, size_t output_size,int arena);
BE_FUNC_CTYPE_DECLARE(be_TFL_load, "b", "@(bytes)~(bytes)~[i]");

extern bbool be_TFL_input(struct bvm *vm, const uint8_t *buf, size_t size);
BE_FUNC_CTYPE_DECLARE(be_TFL_input, "b", "@(bytes)~");

extern bbool be_TFL_output(struct bvm *vm, const uint8_t *buf, size_t size);
BE_FUNC_CTYPE_DECLARE(be_TFL_output, "b", "@(bytes)~");

extern void be_TFL_rec(struct bvm *vm, const char* filename, size_t seconds);
BE_FUNC_CTYPE_DECLARE(be_TFL_rec, "", "@si");

#include "be_fixed_TFL.h"

/* @const_object_info_begin
module TFL (scope: global) {
begin, ctype_func(be_TFL_begin)
load, ctype_func(be_TFL_load)
input, ctype_func(be_TFL_input)
output, ctype_func(be_TFL_output)
log, ctype_func(be_TFL_log)
stats, ctype_func(be_TFL_stats)
rec, ctype_func(be_TFL_rec)
}
@const_object_info_end */

#endif // USE_BERRY_TF_LITE
7 changes: 7 additions & 0 deletions lib/libesp32_ml/mel_freq_extractor/library.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name=MElFreqencyExtractor
version=1.0
author=Christian Baars
maintainer=Christian Baars
sentence=Feature Extractor using mel frequencies
paragraph=Uses ESP-DSP library.
architectures=esp32
315 changes: 315 additions & 0 deletions lib/libesp32_ml/mel_freq_extractor/src/mfcc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
/*
mfcc.h - mel frequency extractor for ESP32
Computes features for slizes of audio data similiar to speechpy
This is intended to provide a stripped down implementation that can work with Edgempulse trained models
based on:
https://github.com/astorfi/speechpy
https://github.com/AIWintermuteAI/Speech-to-Intent-Micro/blob/main/inference_code/Wio_Terminal/wio_speech_to_intent_150_10/mfcc.cpp
Copyright (C) 2022 Christian Baars
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.@
*/



#ifndef MELFREQUENCYEXTRACTOR_H
#define MELFREQUENCYEXTRACTOR_H

#include <Arduino.h>
#include <string.h>
#include "float.h"
#include "esp_dsp.h"


class MFCC{
private:
int num_mfcc_features;
int frame_len;
int frame_len_padded;
int num_bank_bins;
float * m_frame;
float * m_buffer;
float * m_mel_energies;
float * m_dct_matrix;
float * m_mel_fbank;

uint8_t m_amplification;
float m_preemphasis;

float * create_dct_matrix(int32_t input_length, int32_t coefficient_count);
void create_mel_filterbank(int samp_freq, int low_freq, int high_freq);

static inline float InverseMelScale(float mel_freq) {
return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
}

static inline float MelScale(float freq) {
return 1127.0f * logf (1.0f + freq / 700.0f);
}


public:
MFCC(int num_mfcc_features, int frame_len,int num_bank_bins, int samp_freq, int low_freq, int high_freq);
~MFCC();
void set_preamp(uint8_t amplification);
void set_preemphasis(float preemphasis);
void mfcc_compute(const int16_t* data, float* mfcc_out);
void log10_normalize(float* out_buf, int out_buf_len, int noise_floor_db);
};


MFCC::MFCC(int num_mfcc_features, int frame_len, int num_bank_bins, int samp_freq, int low_freq, int high_freq)
:num_mfcc_features(num_mfcc_features),
frame_len(frame_len),
num_bank_bins(num_bank_bins)
{
// Round-up to nearest power of 2.
frame_len_padded = pow(2,ceil((log(frame_len)/log(2))));

m_frame = new float[frame_len_padded];
m_buffer = new float[frame_len_padded * 2];
m_mel_energies = new float[num_bank_bins];

//create window function
// window_func = new float[frame_len];
// dsps_wind_hann_f32(window_func, frame_len);

m_amplification = 1;
m_preemphasis = 0.0;

//create mel filterbank
create_mel_filterbank(samp_freq, low_freq, high_freq);

//create DCT matrix for mfcc mode
if(num_mfcc_features != 0){
m_dct_matrix = create_dct_matrix(num_bank_bins, num_mfcc_features);
}

//initialize FFT
int ret = dsps_fft2r_init_fc32(NULL, frame_len_padded);
if(ret==0){
MicroPrintf("Framelength: %u, (rounded: %u)", frame_len,frame_len_padded);
}
else{
MicroPrintf("dsps_fft2r_init_fc32 error: %d",ret);
}

}

MFCC::~MFCC() {
delete []m_frame;
delete []m_buffer;
delete []m_mel_energies;
// delete []window_func;

delete []m_dct_matrix;

// for(int i=0;i<num_bank_bins;i++)
// delete mel_fbank[i];
delete [] m_mel_fbank;
dsps_fft2r_deinit_fc32();
}

float * MFCC::create_dct_matrix(int32_t input_length, int32_t coefficient_count) {
int32_t k, n;
float * M = new float[input_length*coefficient_count];

float normalizer = sqrt(2.0/(float)input_length);
for (k = 0; k < coefficient_count; k++) {
for (n = 0; n < input_length; n++) {
M[k*input_length+n] = normalizer * cos( ((double)M_PI)/input_length * (n + 0.5) * k );
}
}
return M;
}


void MFCC::create_mel_filterbank(int samp_freq, int low_freq, int high_freq) {
// MicroPrintf("Create FB ...");
int coefficients = frame_len_padded/2 + 1;
m_mel_fbank = new float[num_bank_bins * coefficients](); // zero-init
uint32_t delta = (MelScale(high_freq) - MelScale(low_freq))/(num_bank_bins + 1);

float mels[num_bank_bins+2];
float hertz[num_bank_bins+2];
int freq_index[num_bank_bins+2];

for (int i = 0;i<num_bank_bins+2;i++){
mels[i] = MelScale(low_freq) + (i * delta);
hertz[i] = InverseMelScale(mels[i]);
freq_index[i] = (coefficients + 1) * hertz[i] /samp_freq;
}

for (int i = 0;i<num_bank_bins;i++ ){
int left = int(freq_index[i]);
int middle = int(freq_index[i + 1]);
int right = int(freq_index[i + 2]);
float slope_up = 1/float(middle-left);
float slope_down = 1/float(right-middle);
// MicroPrintf("%u %u %u %f %f",left,middle,right,slope_up,slope_down);
for (int j = 0;j<(right-left+1);j++){
if (j < middle-left + 1){
m_mel_fbank[(i*coefficients)+left+j] = j*slope_up;
}
else{
m_mel_fbank[(i*coefficients)+left+j] = m_mel_fbank[(i*coefficients)+left+j-1]-slope_down;
}
}
}
// MicroPrintf("%f %f %f %f %f %f %f %f ",m_mel_fbank[5],m_mel_fbank[6] ,m_mel_fbank[7] ,m_mel_fbank[8] ,m_mel_fbank[9] ,m_mel_fbank[10] ,m_mel_fbank[11] ,m_mel_fbank[12]);
// MicroPrintf("FB done");
}


void MFCC::log10_normalize(float* out_buf, int out_buf_len, int noise_floor_db) {
const float noise = static_cast<float>(noise_floor_db * -1);
const float noise_scale = 1.0f / (static_cast<float>(noise_floor_db * -1) + 12.0f);
for (size_t ix = 0; ix < out_buf_len; ix++) {
float f = out_buf[ix];
if (f < 1e-30) {
out_buf[ix] = 0;
return;
}
f = 10.0f * log10(f); // scale by 10
f += noise;
f *= noise_scale;
// clip again
if (f < 0.0f) f = 0.0f;
else if (f > 1.0f) f = 1.0f;
out_buf[ix] = f;
}
}

void MFCC::set_preamp(uint8_t amplification){
m_amplification = amplification;
}

void MFCC::set_preemphasis(float preemphasis){
m_preemphasis = preemphasis;
// Speechpy computes this over the window of a sample, here we will compute only over the slize !!
}


void MFCC::mfcc_compute(const int16_t * audio_data, float* mfcc_out) {

int32_t i, j, bin;
int coefficients = frame_len_padded/2 + 1;
int data_clipped = 0;
int data_clipped_low = 0;
float conv_factor = m_amplification;
float clip_thres = 0.99f * (float)(1<<15);

// MicroPrintf("%d %d %d %d %d %d %d %d",audio_data[0],audio_data[1] ,audio_data[2] ,audio_data[3] ,audio_data[4] ,audio_data[5] ,audio_data[6] ,audio_data[7]);

//TensorFlow way of normalizing .wav data to (-1,1) for speechpy's MFE
if(num_mfcc_features == 0){
conv_factor /= (float)(1<<15);
clip_thres /= (float)(1<<15);
}

for (int i = 0; i < frame_len; i++) {
m_buffer[i] = audio_data[i] * conv_factor; //mfe -1..1, mfcc int16_t as float, both with additional pre_amp factor
}
if(m_buffer[i]> clip_thres){
m_buffer[i] /= m_amplification;
data_clipped++;
}
else if( m_buffer[i]< -clip_thres){
m_buffer[i] /= m_amplification;
data_clipped_low++;
}

if(data_clipped>0)
MicroPrintf("Clip: %d __ %d",data_clipped, data_clipped_low);

// MicroPrintf("%f %f %f %f %f %f %f %f ",m_buffer[0],m_buffer[1] ,m_buffer[2] ,m_buffer[3] ,m_buffer[4] ,m_buffer[5] ,m_buffer[6] ,m_buffer[7]);

//pre-emphasis
if(m_preemphasis!=0.0){
m_frame[0] = m_buffer[0] - m_preemphasis * m_buffer[frame_len - 1]; // roll through the frame "back" to the end
for (i = 1; i < frame_len; i++){
m_frame[i] = m_buffer[i] - m_preemphasis * m_buffer[i - 1];
}
}
else{
for (i = 1; i < frame_len; i++){
m_frame[i] = m_buffer[i];
}
}

// prepare buffer for FFT
for (i = 0; i < frame_len_padded; i++) {
m_buffer[i * 2] = i<frame_len ? m_frame[i] : 0;
// m_buffer[i * 2] = i<frame_len ? frame[i] * window_func[i] : 0; // in case we want to use a window function
m_buffer[i*2 + 1] = 0;
}
// MicroPrintf("%f %f %f %f %f %f %f %f ",frame[0],frame[1] ,frame[2] ,frame[3] ,frame[4] ,frame[5] ,frame[6] ,frame[7]);

//Compute FFT
int err = dsps_fft2r_fc32(m_buffer, frame_len_padded);
err += dsps_bit_rev_fc32(m_buffer, frame_len_padded); //Bit reverse
// err += dsps_cplx2reC_fc32(m_buffer, frame_len_padded);// Complex spectrum in y_cf

if(err!=0){
MicroPrintf("dsps_fft2r error: %u %f %f %f %f %f %f %f %f ",err,m_buffer[0],m_buffer[1] ,m_buffer[2] ,m_buffer[3] ,m_buffer[4] ,m_buffer[5] ,m_buffer[512] ,m_buffer[513]);
}

for (int i = 0 ; i < coefficients ; i++) {
m_buffer[i] = (m_buffer[i*2] * m_buffer[i*2] + m_buffer[i*2 + 1] * m_buffer[i*2 + 1])/frame_len_padded;
}
// MicroPrintf(" pow spec: %f %f %f %f %f %f %f %f ",m_buffer[0],m_buffer[1] ,m_buffer[2] ,m_buffer[3] ,m_buffer[4] ,m_buffer[5] ,m_buffer[255] ,m_buffer[256]);

//Apply mel filterbanks
for (int i = 0;i<num_bank_bins;i++ ){
m_mel_energies[i] = 0;
for (int j = 0;j<coefficients;j++ ){
m_mel_energies[i] += m_buffer[j] * m_mel_fbank[(i * coefficients) + j];
}
}

// for MFE copy and return - compute 10 * log10() later explicitely
if(num_mfcc_features == 0){
for (bin = 0; bin < num_bank_bins; bin++){
mfcc_out[bin] = m_mel_energies[bin];
}
// MicroPrintf("%u feat: %f %f %f %f %f %f %f %f ",num_bank_bins,mfcc_out[0],mfcc_out[1] ,mfcc_out[2] ,mfcc_out[3] ,mfcc_out[4] ,mfcc_out[5] ,mfcc_out[6] ,mfcc_out[7]);
return;
}

// Continue for MFCC
// Take log
for (bin = 0; bin < num_bank_bins; bin++){
m_mel_energies[bin] = logf(m_mel_energies[bin]);
}

// Take DCT. Uses matrix mul.
for (i = 1; i < num_mfcc_features; i++) {
float sum = 0.0;
for (j = 0; j < num_bank_bins; j++) {
sum += m_dct_matrix[i*num_bank_bins+j] * m_mel_energies[j];
}
mfcc_out[i] = sum;
}

// replace first cepstral coefficient with log of frame energy for DC elimination
for (i=0; i<frame_len_padded; i++){
mfcc_out[0] += m_buffer[i];
}
mfcc_out[0] = logf(mfcc_out[0]);
}


#endif //MELFREQUENCYEXTRACTOR_H
Loading

0 comments on commit 83f039c

Please sign in to comment.