Skip to content

Commit

Permalink
Merge branch 'ci/sync_gh_tflite-micro' into 'master'
Browse files Browse the repository at this point in the history
Sync esp-tflite-micro from github - 592464

See merge request app-frameworks/esp-tflite-micro!123
  • Loading branch information
vikramdattu committed Oct 26, 2023
2 parents d8c5d0f + 26122fb commit 9074e54
Show file tree
Hide file tree
Showing 64 changed files with 3,001 additions and 3,841 deletions.
18 changes: 11 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,20 @@ file(GLOB srcs_kernels
"${tfmicro_kernels_dir}/*.c"
"${tfmicro_kernels_dir}/*.cc")

file(GLOB signal_micro_kernels
"${signal_dir}/micro/kernels/*.c"
"${signal_dir}/micro/kernels/*.cc")

file(GLOB signal_src
"${signal_dir}/src/*.c"
"${signal_dir}/src/*.cc")

set(signal_srcs
"${signal_dir}/micro/kernels/rfft.cc"
"${signal_dir}/micro/kernels/window.cc"
"${signal_micro_kernels}"
"${signal_src}"
"${signal_dir}/src/kiss_fft_wrappers/kiss_fft_float.cc"
"${signal_dir}/src/kiss_fft_wrappers/kiss_fft_int16.cc"
"${signal_dir}/src/kiss_fft_wrappers/kiss_fft_int32.cc"
"${signal_dir}/src/rfft_float.cc"
"${signal_dir}/src/rfft_int16.cc"
"${signal_dir}/src/rfft_int32.cc"
"${signal_dir}/src/window.cc")
"${signal_dir}/src/kiss_fft_wrappers/kiss_fft_int32.cc")

# remove sources which will be provided by esp_nn
list(REMOVE_ITEM srcs_kernels
Expand Down
1 change: 1 addition & 0 deletions examples/micro_speech/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cmake_minimum_required(VERSION 3.5)
set(EXTRA_COMPONENT_DIRS test_data)
include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(micro_speech)
4 changes: 2 additions & 2 deletions examples/micro_speech/main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ idf_component_register(
SRCS main.cc main_functions.cc audio_provider.cc feature_provider.cc
no_micro_features_data.cc yes_micro_features_data.cc
model.cc recognize_commands.cc command_responder.cc
micro_features_generator.cc micro_model_settings.cc ringbuf.c
PRIV_REQUIRES spi_flash driver esp_timer
micro_features_generator.cc ringbuf.c
PRIV_REQUIRES spi_flash driver esp_timer test_data
INCLUDE_DIRS "")

# Reduce the level of paranoia to be able to compile sources
Expand Down
746 changes: 746 additions & 0 deletions examples/micro_speech/main/audio_preprocessor_int8_model_data.h

Large diffs are not rendered by default.

75 changes: 61 additions & 14 deletions examples/micro_speech/main/audio_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ limitations under the License.

using namespace std;

// for c2 and c3, I2S support was added from IDF v4.4 onwards
#define NO_I2S_SUPPORT CONFIG_IDF_TARGET_ESP32C2 || \
(CONFIG_IDF_TARGET_ESP32C3 \
&& (ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)))
Expand All @@ -47,23 +48,28 @@ volatile int32_t g_latest_audio_timestamp = 0;
* each time , storing old data in the histrory buffer , {
* history_samples_to_keep = 10 * 16 } */
constexpr int32_t history_samples_to_keep =
((kFeatureSliceDurationMs - kFeatureSliceStrideMs) *
((kFeatureDurationMs - kFeatureStrideMs) *
(kAudioSampleFrequency / 1000));
/* new samples to get each time from ringbuffer, { new_samples_to_get = 20 * 16
* } */
constexpr int32_t new_samples_to_get =
(kFeatureSliceStrideMs * (kAudioSampleFrequency / 1000));
(kFeatureStrideMs * (kAudioSampleFrequency / 1000));

const int32_t kAudioCaptureBufferSize = 80000;
const int32_t kAudioCaptureBufferSize = 40000;
const int32_t i2s_bytes_to_read = 3200;

namespace {
int16_t g_audio_output_buffer[kMaxAudioSampleSize];
int16_t g_audio_output_buffer[kMaxAudioSampleSize * 32];
bool g_is_audio_initialized = false;
int16_t g_history_buffer[history_samples_to_keep];

#if !NO_I2S_SUPPORT
uint8_t g_i2s_read_buffer[i2s_bytes_to_read] = {};
#if CONFIG_IDF_TARGET_ESP32
i2s_port_t i2s_port = I2S_NUM_1; // for esp32-eye
#else
i2s_port_t i2s_port = I2S_NUM_0; // for esp32-s3-eye
#endif
#endif
} // namespace

Expand All @@ -73,9 +79,9 @@ uint8_t g_i2s_read_buffer[i2s_bytes_to_read] = {};
static void i2s_init(void) {
// Start listening for audio: MONO @ 16KHz
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_TX),
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = 16000,
.bits_per_sample = (i2s_bits_per_sample_t)16,
.bits_per_sample = (i2s_bits_per_sample_t) 16,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = I2S_COMM_FORMAT_I2S,
.intr_alloc_flags = 0,
Expand All @@ -85,23 +91,34 @@ static void i2s_init(void) {
.tx_desc_auto_clear = false,
.fixed_mclk = -1,
};
#if CONFIG_IDF_TARGET_ESP32S3
i2s_pin_config_t pin_config = {
.bck_io_num = 41, // IIS_SCLK
.ws_io_num = 42, // IIS_LCLK
.data_out_num = -1, // IIS_DSIN
.data_in_num = 2, // IIS_DOUT
};
i2s_config.bits_per_sample = (i2s_bits_per_sample_t) 32;
#else
i2s_pin_config_t pin_config = {
.bck_io_num = 26, // IIS_SCLK
.ws_io_num = 32, // IIS_LCLK
.data_out_num = -1, // IIS_DSIN
.data_in_num = 33, // IIS_DOUT
};
#endif

esp_err_t ret = 0;
ret = i2s_driver_install((i2s_port_t)1, &i2s_config, 0, NULL);
ret = i2s_driver_install(i2s_port, &i2s_config, 0, NULL);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Error in i2s_driver_install");
}
ret = i2s_set_pin((i2s_port_t)1, &pin_config);
ret = i2s_set_pin(i2s_port, &pin_config);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Error in i2s_set_pin");
}

ret = i2s_zero_dma_buffer((i2s_port_t)1);
ret = i2s_zero_dma_buffer(i2s_port);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Error in initializing dma buffer with 0");
}
Expand All @@ -111,23 +128,33 @@ static void i2s_init(void) {
static void CaptureSamples(void* arg) {
#if NO_I2S_SUPPORT
ESP_LOGE(TAG, "i2s support not available on C3 chip for IDF < 4.4.0");
return;
#else
size_t bytes_read = i2s_bytes_to_read;
i2s_init();
while (1) {
/* read 100ms data at once from i2s */
i2s_read((i2s_port_t)1, (void*)g_i2s_read_buffer, i2s_bytes_to_read,
&bytes_read, 10);
i2s_read(i2s_port, (void*)g_i2s_read_buffer, i2s_bytes_to_read,
&bytes_read, pdMS_TO_TICKS(100));

if (bytes_read <= 0) {
ESP_LOGE(TAG, "Error in I2S read : %d", bytes_read);
} else {
if (bytes_read < i2s_bytes_to_read) {
ESP_LOGW(TAG, "Partial I2S read");
}
#if CONFIG_IDF_TARGET_ESP32S3
// rescale the data
for (int i = 0; i < bytes_read / 4; ++i) {
((int16_t *) g_i2s_read_buffer)[i] = ((int32_t *) g_i2s_read_buffer)[i] >> 14;
}
bytes_read = bytes_read / 2;
#endif
/* write bytes read by i2s into ring buffer */
int bytes_written = rb_write(g_audio_capture_buffer,
(uint8_t*)g_i2s_read_buffer, bytes_read, 10);
(uint8_t*)g_i2s_read_buffer, bytes_read, pdMS_TO_TICKS(100));
if (bytes_written != bytes_read) {
ESP_LOGI(TAG, "Could only write %d bytes out of %d", bytes_written, bytes_read);
}
/* update the timestamp (in ms) to let the model know that new data has
* arrived */
g_latest_audio_timestamp = g_latest_audio_timestamp +
Expand Down Expand Up @@ -159,6 +186,26 @@ TfLiteStatus InitAudioRecording() {
return kTfLiteOk;
}

TfLiteStatus GetAudioSamples1(int* audio_samples_size, int16_t** audio_samples)
{
if (!g_is_audio_initialized) {
TfLiteStatus init_status = InitAudioRecording();
if (init_status != kTfLiteOk) {
return init_status;
}
g_is_audio_initialized = true;
}
int bytes_read =
rb_read(g_audio_capture_buffer, (uint8_t*)(g_audio_output_buffer), 16000, 1000);
if (bytes_read < 0) {
ESP_LOGI(TAG, "Couldn't read data in time");
bytes_read = 0;
}
*audio_samples_size = bytes_read;
*audio_samples = g_audio_output_buffer;
return kTfLiteOk;
}

TfLiteStatus GetAudioSamples(int start_ms, int duration_ms,
int* audio_samples_size, int16_t** audio_samples) {
if (!g_is_audio_initialized) {
Expand All @@ -177,7 +224,7 @@ TfLiteStatus GetAudioSamples(int start_ms, int duration_ms,
int bytes_read =
rb_read(g_audio_capture_buffer,
((uint8_t*)(g_audio_output_buffer + history_samples_to_keep)),
new_samples_to_get * sizeof(int16_t), 10);
new_samples_to_get * sizeof(int16_t), pdMS_TO_TICKS(200));
if (bytes_read < 0) {
ESP_LOGE(TAG, " Model Could not read data from Ring Buffer");
} else if (bytes_read < new_samples_to_get * sizeof(int16_t)) {
Expand Down
2 changes: 2 additions & 0 deletions examples/micro_speech/main/audio_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ limitations under the License.
TfLiteStatus GetAudioSamples(int start_ms, int duration_ms,
int* audio_samples_size, int16_t** audio_samples);

TfLiteStatus GetAudioSamples1(int* audio_samples_size, int16_t** audio_samples);

// Returns the time that audio data was last captured in milliseconds. There's
// no contract about what time zero represents, the accuracy, or the granularity
// of the result. Subsequent calls will generally not return a lower value, but
Expand Down
4 changes: 2 additions & 2 deletions examples/micro_speech/main/command_responder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ limitations under the License.
// to the error console. Real applications will want to take some custom
// action instead, and should implement their own versions of this function.
void RespondToCommand(int32_t current_time, const char* found_command,
uint8_t score, bool is_new_command) {
float score, bool is_new_command) {
if (is_new_command) {
MicroPrintf("Heard %s (%d) @%dms", found_command, score, current_time);
MicroPrintf("Heard %s (%.4f) @%dms", found_command, score, current_time);
}
}
2 changes: 1 addition & 1 deletion examples/micro_speech/main/command_responder.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ limitations under the License.
// argument, `score` has the numerical confidence, and `is_new_command` is set
// if the previous command was different to this one.
void RespondToCommand(int32_t current_time, const char* found_command,
uint8_t score, bool is_new_command);
float score, bool is_new_command);

#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_COMMAND_RESPONDER_H_
Loading

0 comments on commit 9074e54

Please sign in to comment.