ESP32唤醒流程
1、ESP32启动->audio_service_.Initialize(codec);
void Application::Start() {auto& board = Board::GetInstance();SetDeviceState(kDeviceStateStarting);/* Setup the display */auto display = board.GetDisplay();/* Setup the audio service */auto codec = board.GetAudioCodec();audio_service_.Initialize(codec);audio_service_.Start();AudioServiceCallbacks callbacks;callbacks.on_send_queue_available = [this]() {xEventGroupSetBits(event_group_, MAIN_EVENT_SEND_AUDIO);};callbacks.on_wake_word_detected = [this](const std::string& wake_word) {xEventGroupSetBits(event_group_, MAIN_EVENT_WAKE_WORD_DETECTED);};callbacks.on_vad_change = [this](bool speaking) {xEventGroupSetBits(event_group_, MAIN_EVENT_VAD_CHANGE);};audio_service_.SetCallbacks(callbacks);// ......// ......// ......
}
2、audio_service初始化->std::make_unique<AfeWakeWord>();
#if CONFIG_USE_AFE_WAKE_WORDwake_word_ = std::make_unique<AfeWakeWord>();
#elif CONFIG_USE_ESP_WAKE_WORDwake_word_ = std::make_unique<EspWakeWord>();
#elif CONFIG_USE_CUSTOM_WAKE_WORDwake_word_ = std::make_unique<CustomWakeWord>();
#elsewake_word_ = nullptr;
#endif
3、AfeWakeWord初始化
srmodel_list_t *models = esp_srmodel_init("model");
afe_config_t *afe_config = afe_config_init("MMNR", models, AFE_TYPE_SR, AFE_MODE_HIGH_PERF);
使用 afe_config_init()
获取默认配置并根据需求调整参数:
input_format
:定义通道排列(如MMNR
)。models
:模型列表(如NS、VAD或WakeNet模型)。afe_type
:AFE类型(如AFE_TYPE_SR
表示语音识别场景)。afe_mode
:性能模式(如AFE_MODE_HIGH_PERF
表示高性能模式)。
通过配置创建AFE实例:
// 获取句柄
esp_afe_sr_iface_t *afe_handle = esp_afe_handle_from_config(afe_config);
// 创建实例
esp_afe_sr_data_t *afe_data = afe_handle->create_from_config(afe_config);
完整代码:
bool AfeWakeWord::Initialize(AudioCodec* codec) {codec_ = codec;int ref_num = codec_->input_reference() ? 1 : 0;// 读取唤醒词模型,"idf.py menuconfig"设置了几个,这里就有几个models_ = esp_srmodel_init("model");if (models_ == nullptr || models_->num == -1) {ESP_LOGE(TAG, "Failed to initialize wakenet model");return false;}for (int i = 0; i < models_->num; i++) {ESP_LOGI(TAG, "Model %d: %s", i, models_->model_name[i]);if (strstr(models_->model_name[i], ESP_WN_PREFIX) != NULL) {wakenet_model_ = models_->model_name[i];auto words = esp_srmodel_get_wake_words(models_, wakenet_model_);// split by ";" to get all wake wordsstd::stringstream ss(words);std::string word;while (std::getline(ss, word, ';')) {wake_words_.push_back(word);}}}std::string input_format;for (int i = 0; i < codec_->input_channels() - ref_num; i++) {input_format.push_back('M');}for (int i = 0; i < ref_num; i++) {input_format.push_back('R');}afe_config_t* afe_config = afe_config_init(input_format.c_str(), models_, AFE_TYPE_SR, AFE_MODE_HIGH_PERF);afe_config->aec_init = codec_->input_reference();afe_config->aec_mode = AEC_MODE_SR_HIGH_PERF;afe_config->afe_perferred_core = 1;afe_config->afe_perferred_priority = 1;afe_config->memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM;afe_iface_ = esp_afe_handle_from_config(afe_config);afe_data_ = afe_iface_->create_from_config(afe_config);xTaskCreate([](void* arg) {auto this_ = (AfeWakeWord*)arg;this_->AudioDetectionTask();vTaskDelete(NULL);}, "audio_detection", 4096, this, 3, nullptr);return true;
}
4、AudioDetectionTask音频监听处理:
将音频数据输入AFE进行处理。输入数据格式需与 input_format 匹配:
int feed_chunksize = afe_handle->get_feed_chunksize(afe_data);
int feed_nch = afe_handle->get_feed_channel_num(afe_data);
int16_t *feed_buff = (int16_t *) malloc(feed_chunksize * feed_nch * sizeof(int16_t));
afe_handle->feed(afe_data, feed_buff);
feed_chunksize
:每帧输入的样本数。feed_nch
:输入数据的通道数。feed_buff
:通道交错的音频数据(16位有符号,16 kHz)。
获取处理后的单通道音频输出和检测状态:
auto res = afe_iface_->fetch_with_delay(afe_data_, portMAX_DELAY);
void AfeWakeWord::AudioDetectionTask() {// 获取音频样子数auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_);auto feed_size = afe_iface_->get_feed_chunksize(afe_data_);ESP_LOGI(TAG, "Audio detection task started, feed size: %d fetch size: %d",feed_size, fetch_size);while (true) {xEventGroupWaitBits(event_group_, DETECTION_RUNNING_EVENT, pdFALSE, pdTRUE, portMAX_DELAY);// 延时读取,获取处理后的单通道音频输出和检测状态auto res = afe_iface_->fetch_with_delay(afe_data_, portMAX_DELAY);if (res == nullptr || res->ret_value == ESP_FAIL) {continue;;}// Store the wake word data for voice recognition, like who is speakingStoreWakeWordData(res->data, res->data_size / sizeof(int16_t));if (res->wakeup_state == WAKENET_DETECTED) {Stop();last_detected_wake_word_ = wake_words_[res->wakenet_model_index - 1];if (wake_word_detected_callback_) {wake_word_detected_callback_(last_detected_wake_word_);}}}
}