Merge branch 'ci/sync_gh_tflite-micro' into 'master'

Sync esp-tflite-micro from github - 592464 See merge request app-frameworks/esp-tflite-micro!123
espressif · Oct 26, 2023 · 9074e54 · 9074e54
2 parents d8c5d0f + 26122fb
commit 9074e54
Show file tree

Hide file tree

Showing 64 changed files with 3,001 additions and 3,841 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -24,16 +24,20 @@ file(GLOB srcs_kernels
           "${tfmicro_kernels_dir}/*.c"
           "${tfmicro_kernels_dir}/*.cc")
 
+file(GLOB signal_micro_kernels
+          "${signal_dir}/micro/kernels/*.c"
+          "${signal_dir}/micro/kernels/*.cc")
+
+file(GLOB signal_src
+          "${signal_dir}/src/*.c"
+          "${signal_dir}/src/*.cc")
+
 set(signal_srcs
-          "${signal_dir}/micro/kernels/rfft.cc"
-          "${signal_dir}/micro/kernels/window.cc"
+          "${signal_micro_kernels}"
+          "${signal_src}"
           "${signal_dir}/src/kiss_fft_wrappers/kiss_fft_float.cc"
           "${signal_dir}/src/kiss_fft_wrappers/kiss_fft_int16.cc"
-          "${signal_dir}/src/kiss_fft_wrappers/kiss_fft_int32.cc"
-          "${signal_dir}/src/rfft_float.cc"
-          "${signal_dir}/src/rfft_int16.cc"
-          "${signal_dir}/src/rfft_int32.cc"
-          "${signal_dir}/src/window.cc")
+          "${signal_dir}/src/kiss_fft_wrappers/kiss_fft_int32.cc")
 
 # remove sources which will be provided by esp_nn
 list(REMOVE_ITEM srcs_kernels

diff --git a/examples/micro_speech/CMakeLists.txt b/examples/micro_speech/CMakeLists.txt
@@ -1,3 +1,4 @@
 cmake_minimum_required(VERSION 3.5)
+set(EXTRA_COMPONENT_DIRS test_data)
 include($ENV{IDF_PATH}/tools/cmake/project.cmake)
 project(micro_speech)
diff --git a/examples/micro_speech/main/CMakeLists.txt b/examples/micro_speech/main/CMakeLists.txt
@@ -7,8 +7,8 @@ idf_component_register(
     SRCS main.cc main_functions.cc audio_provider.cc feature_provider.cc
          no_micro_features_data.cc yes_micro_features_data.cc
          model.cc recognize_commands.cc command_responder.cc
-         micro_features_generator.cc micro_model_settings.cc ringbuf.c
-    PRIV_REQUIRES spi_flash driver esp_timer
+         micro_features_generator.cc ringbuf.c
+    PRIV_REQUIRES spi_flash driver esp_timer test_data
     INCLUDE_DIRS "")
 
     # Reduce the level of paranoia to be able to compile sources

diff --git a/examples/micro_speech/main/audio_preprocessor_int8_model_data.h b/examples/micro_speech/main/audio_preprocessor_int8_model_data.h
diff --git a/examples/micro_speech/main/audio_provider.cc b/examples/micro_speech/main/audio_provider.cc
@@ -35,6 +35,7 @@ limitations under the License.
 
 using namespace std;
 
+// for c2 and c3, I2S support was added from IDF v4.4 onwards
 #define NO_I2S_SUPPORT CONFIG_IDF_TARGET_ESP32C2 || \
                           (CONFIG_IDF_TARGET_ESP32C3 \
                           && (ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)))
@@ -47,23 +48,28 @@ volatile int32_t g_latest_audio_timestamp = 0;
  * each time , storing old data in the histrory buffer , {
  * history_samples_to_keep = 10 * 16 } */
 constexpr int32_t history_samples_to_keep =
-    ((kFeatureSliceDurationMs - kFeatureSliceStrideMs) *
+    ((kFeatureDurationMs - kFeatureStrideMs) *
      (kAudioSampleFrequency / 1000));
 /* new samples to get each time from ringbuffer, { new_samples_to_get =  20 * 16
  * } */
 constexpr int32_t new_samples_to_get =
-    (kFeatureSliceStrideMs * (kAudioSampleFrequency / 1000));
+    (kFeatureStrideMs * (kAudioSampleFrequency / 1000));
 
-const int32_t kAudioCaptureBufferSize = 80000;
+const int32_t kAudioCaptureBufferSize = 40000;
 const int32_t i2s_bytes_to_read = 3200;
 
 namespace {
-int16_t g_audio_output_buffer[kMaxAudioSampleSize];
+int16_t g_audio_output_buffer[kMaxAudioSampleSize * 32];
 bool g_is_audio_initialized = false;
 int16_t g_history_buffer[history_samples_to_keep];
 
 #if !NO_I2S_SUPPORT
 uint8_t g_i2s_read_buffer[i2s_bytes_to_read] = {};
+#if CONFIG_IDF_TARGET_ESP32
+i2s_port_t i2s_port = I2S_NUM_1; // for esp32-eye
+#else
+i2s_port_t i2s_port = I2S_NUM_0; // for esp32-s3-eye
+#endif
 #endif
 }  // namespace
 
@@ -73,9 +79,9 @@ uint8_t g_i2s_read_buffer[i2s_bytes_to_read] = {};
 static void i2s_init(void) {
   // Start listening for audio: MONO @ 16KHz
   i2s_config_t i2s_config = {
-      .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_TX),
+      .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
       .sample_rate = 16000,
-      .bits_per_sample = (i2s_bits_per_sample_t)16,
+      .bits_per_sample = (i2s_bits_per_sample_t) 16,
       .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
       .communication_format = I2S_COMM_FORMAT_I2S,
       .intr_alloc_flags = 0,
@@ -85,23 +91,34 @@ static void i2s_init(void) {
       .tx_desc_auto_clear = false,
       .fixed_mclk = -1,
   };
+#if CONFIG_IDF_TARGET_ESP32S3
+  i2s_pin_config_t pin_config = {
+      .bck_io_num = 41,    // IIS_SCLK
+      .ws_io_num = 42,     // IIS_LCLK
+      .data_out_num = -1,  // IIS_DSIN
+      .data_in_num = 2,   // IIS_DOUT
+  };
+  i2s_config.bits_per_sample = (i2s_bits_per_sample_t) 32;
+#else
   i2s_pin_config_t pin_config = {
       .bck_io_num = 26,    // IIS_SCLK
       .ws_io_num = 32,     // IIS_LCLK
       .data_out_num = -1,  // IIS_DSIN
       .data_in_num = 33,   // IIS_DOUT
   };
+#endif
+
   esp_err_t ret = 0;
-  ret = i2s_driver_install((i2s_port_t)1, &i2s_config, 0, NULL);
+  ret = i2s_driver_install(i2s_port, &i2s_config, 0, NULL);
   if (ret != ESP_OK) {
     ESP_LOGE(TAG, "Error in i2s_driver_install");
   }
-  ret = i2s_set_pin((i2s_port_t)1, &pin_config);
+  ret = i2s_set_pin(i2s_port, &pin_config);
   if (ret != ESP_OK) {
     ESP_LOGE(TAG, "Error in i2s_set_pin");
   }
 
-  ret = i2s_zero_dma_buffer((i2s_port_t)1);
+  ret = i2s_zero_dma_buffer(i2s_port);
   if (ret != ESP_OK) {
     ESP_LOGE(TAG, "Error in initializing dma buffer with 0");
   }
@@ -111,23 +128,33 @@ static void i2s_init(void) {
 static void CaptureSamples(void* arg) {
 #if NO_I2S_SUPPORT
   ESP_LOGE(TAG, "i2s support not available on C3 chip for IDF < 4.4.0");
-  return;
 #else
   size_t bytes_read = i2s_bytes_to_read;
   i2s_init();
   while (1) {
     /* read 100ms data at once from i2s */
-    i2s_read((i2s_port_t)1, (void*)g_i2s_read_buffer, i2s_bytes_to_read,
-             &bytes_read, 10);
+    i2s_read(i2s_port, (void*)g_i2s_read_buffer, i2s_bytes_to_read,
+             &bytes_read, pdMS_TO_TICKS(100));
+
     if (bytes_read <= 0) {
       ESP_LOGE(TAG, "Error in I2S read : %d", bytes_read);
     } else {
       if (bytes_read < i2s_bytes_to_read) {
         ESP_LOGW(TAG, "Partial I2S read");
       }
+#if CONFIG_IDF_TARGET_ESP32S3
+      // rescale the data
+      for (int i = 0; i < bytes_read / 4; ++i) {
+        ((int16_t *) g_i2s_read_buffer)[i] = ((int32_t *) g_i2s_read_buffer)[i] >> 14;
+      }
+      bytes_read = bytes_read / 2;
+#endif
       /* write bytes read by i2s into ring buffer */
       int bytes_written = rb_write(g_audio_capture_buffer,
-                                   (uint8_t*)g_i2s_read_buffer, bytes_read, 10);
+                                   (uint8_t*)g_i2s_read_buffer, bytes_read, pdMS_TO_TICKS(100));
+      if (bytes_written != bytes_read) {
+        ESP_LOGI(TAG, "Could only write %d bytes out of %d", bytes_written, bytes_read);
+      }
       /* update the timestamp (in ms) to let the model know that new data has
        * arrived */
       g_latest_audio_timestamp = g_latest_audio_timestamp +
@@ -159,6 +186,26 @@ TfLiteStatus InitAudioRecording() {
   return kTfLiteOk;
 }
 
+TfLiteStatus GetAudioSamples1(int* audio_samples_size, int16_t** audio_samples)
+{
+  if (!g_is_audio_initialized) {
+    TfLiteStatus init_status = InitAudioRecording();
+    if (init_status != kTfLiteOk) {
+      return init_status;
+    }
+    g_is_audio_initialized = true;
+  }
+  int bytes_read =
+    rb_read(g_audio_capture_buffer, (uint8_t*)(g_audio_output_buffer), 16000, 1000);
+  if (bytes_read < 0) {
+    ESP_LOGI(TAG, "Couldn't read data in time");
+    bytes_read = 0;
+  }
+  *audio_samples_size = bytes_read;
+  *audio_samples = g_audio_output_buffer;
+  return kTfLiteOk;
+}
+
 TfLiteStatus GetAudioSamples(int start_ms, int duration_ms,
                              int* audio_samples_size, int16_t** audio_samples) {
   if (!g_is_audio_initialized) {
@@ -177,7 +224,7 @@ TfLiteStatus GetAudioSamples(int start_ms, int duration_ms,
   int bytes_read =
       rb_read(g_audio_capture_buffer,
               ((uint8_t*)(g_audio_output_buffer + history_samples_to_keep)),
-              new_samples_to_get * sizeof(int16_t), 10);
+              new_samples_to_get * sizeof(int16_t), pdMS_TO_TICKS(200));
   if (bytes_read < 0) {
     ESP_LOGE(TAG, " Model Could not read data from Ring Buffer");
   } else if (bytes_read < new_samples_to_get * sizeof(int16_t)) {

diff --git a/examples/micro_speech/main/audio_provider.h b/examples/micro_speech/main/audio_provider.h
@@ -31,6 +31,8 @@ limitations under the License.
 TfLiteStatus GetAudioSamples(int start_ms, int duration_ms,
                              int* audio_samples_size, int16_t** audio_samples);
 
+TfLiteStatus GetAudioSamples1(int* audio_samples_size, int16_t** audio_samples);
+
 // Returns the time that audio data was last captured in milliseconds. There's
 // no contract about what time zero represents, the accuracy, or the granularity
 // of the result. Subsequent calls will generally not return a lower value, but

diff --git a/examples/micro_speech/main/command_responder.cc b/examples/micro_speech/main/command_responder.cc
@@ -20,8 +20,8 @@ limitations under the License.
 // to the error console. Real applications will want to take some custom
 // action instead, and should implement their own versions of this function.
 void RespondToCommand(int32_t current_time, const char* found_command,
-                      uint8_t score, bool is_new_command) {
+                      float score, bool is_new_command) {
   if (is_new_command) {
-    MicroPrintf("Heard %s (%d) @%dms", found_command, score, current_time);
+    MicroPrintf("Heard %s (%.4f) @%dms", found_command, score, current_time);
   }
 }
diff --git a/examples/micro_speech/main/command_responder.h b/examples/micro_speech/main/command_responder.h
@@ -25,6 +25,6 @@ limitations under the License.
 // argument, `score` has the numerical confidence, and `is_new_command` is set
 // if the previous command was different to this one.
 void RespondToCommand(int32_t current_time, const char* found_command,
-                      uint8_t score, bool is_new_command);
+                      float score, bool is_new_command);
 
 #endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_COMMAND_RESPONDER_H_