当前位置: 首页 > news >正文

Unity 工具 之 Azure 微软连续语音识别ASR的简单整理

Unity 工具 之 Azure 微软连续语音识别ASR的简单整理

目录

Unity 工具 之 Azure 微软连续语音识别ASR的简单整理

一、简单介绍

二、实现原理

三、注意实现

四、实现步骤

 五、关键脚本


一、简单介绍

Unity 工具类,自己整理的一些游戏开发可能用到的模块,单独独立使用,方便游戏开发。

本节介绍,这里在使用微软的Azure 进行语音合成的两个方法的做简单整理,这里简单说明,如果你有更好的方法,欢迎留言交流。

官网注册:

面向学生的 Azure - 免费帐户额度 | Microsoft Azure

官网技术文档网址:

技术文档 | Microsoft Learn

官网的TTS:

语音转文本快速入门 - 语音服务 - Azure AI services | Microsoft Learn

Azure Unity SDK  包官网:

安装语音 SDK - Azure Cognitive Services | Microsoft Learn

SDK具体链接:

https://aka.ms/csspeech/unitypackage

二、实现原理

1、官网申请得到语音识别对应的 SPEECH_KEY 和 SPEECH_REGION

2、因为语音识别需要用到麦克风,移动端需要申请麦克风权限

3、开启语音识别,监听语音识别对应事件,即可获取到识别结果

三、注意实现

1、注意如果有卡顿什么的,注意主子线程切换,可能可以适当解决你的卡顿现象

2、注意电脑端(例如windows)运行可以不申请麦克风权限,但是移动端(例如Android)运行要申请麦克风权限,不然无法开启识别成功,可能会报错:Exception with an error code: 0x15

<uses-permission android:name="android.permission.RECORD_AUDIO"/>

System.ApplicationException: Exception with an error code: 0x15 at Microsoft.CognitiveServices.Speech.Internal.SpxExceptionThrower.ThrowIfFail (System.IntPtr hr) [0x00000] in <00000000000000000000000000000000>:0 at Microsoft.CognitiveServices.Speech.Recognizer.StartContinuousRecognition () [0x00000] in <00000000000000000000000000000000>:0 at Microsoft.CognitiveServices.Speech.Recognizer.DoAsyncRecognitionAction (System.Action recoImplAction) [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.Tasks.Task.Execute () [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.ExecutionContext.RunInternal (System.Threading.ExecutionContext executionContext, System.Threading.ContextCallback callback, System.Object state, System.Boolean preserveSyncCtx) [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.Tasks.Task.ExecuteWithThreadLocal (System.Threading.Tasks.Task& currentTaskSlot) [0x00000] in

四、实现步骤

1、下载好SDK 导入

2、简单的搭建场景

3、编写对应脚本,测试语音识别功能

4、把测试脚本添加到场景中,并赋值

5、如果移动端,例如 Android 端,勾选如下,添加麦克风权限

<uses-permission android:name="android.permission.RECORD_AUDIO"/>

5、运行,点击对应按钮,开始识别,Console 中可以看到识别结果

 五、关键脚本

1、TestSpeechRecognitionHandler


using UnityEngine;
using UnityEngine.Android;
using UnityEngine.UI;public class TestSpeechRecognitionHandler : MonoBehaviour
{#region Data/// <summary>/// 按钮,文本/// </summary>public Button QuitButton;public Button ASRButton;public Button StopASRButton;public Text ASRText;/// <summary>/// m_SpeechAndKeywordRecognitionHandler/// </summary>SpeechRecognitionHandler m_SpeechAndKeywordRecognitionHandler;#endregion#region Liefecycle function/// <summary>/// Start/// </summary>void Start(){QuitButton.onClick.AddListener(OnClickQuitButton);ASRButton.onClick.AddListener(OnClickASRButton);StopASRButton.onClick.AddListener(OnClickStopASRButton);// 请求麦克风权限RequestMicrophonePermission();}/// <summary>/// 应用退出/// </summary>async void OnApplicationQuit() {await m_SpeechAndKeywordRecognitionHandler.StopContinuousRecognizer();}#endregion#region Private function/// <summary>/// RequestMicrophonePermission/// </summary>void RequestMicrophonePermission(){// 检查当前平台是否为 Androidif (Application.platform == RuntimePlatform.Android){// 检查是否已经授予麦克风权限if (!Permission.HasUserAuthorizedPermission(Permission.Microphone)){// 如果没有权限,请求用户授权Permission.RequestUserPermission(Permission.Microphone);}}else{// 在其他平台上,可以执行其他平台特定的逻辑Debug.LogWarning("Microphone permission is not needed on this platform.");}SpeechInitialized();}/// <summary>/// SpeechInitialized/// </summary>private void SpeechInitialized() {ASRText.text = "";m_SpeechAndKeywordRecognitionHandler = new SpeechRecognitionHandler();m_SpeechAndKeywordRecognitionHandler.onRecognizingAction = (str) => { Debug.Log("onRecognizingAction: " + str); };m_SpeechAndKeywordRecognitionHandler.onRecognizedSpeechAction = (str) => { Loom.QueueOnMainThread(() => ASRText.text += str);  Debug.Log("onRecognizedSpeechAction: " + str); };m_SpeechAndKeywordRecognitionHandler.onErrorAction = (str) => { Debug.Log("onErrorAction: " + str); };m_SpeechAndKeywordRecognitionHandler.Initialized();}/// <summary>/// OnClickQuitButton/// </summary>private void OnClickQuitButton() {
#if UNITY_EDITORUnityEditor.EditorApplication.isPlaying = false;#elseApplication.Quit();
#endif }/// <summary>/// OnClickASRButton/// </summary>private void OnClickASRButton() {m_SpeechAndKeywordRecognitionHandler.StartContinuousRecognizer();}/// <summary>/// OnClickStopASRButton/// </summary>private async void OnClickStopASRButton(){await m_SpeechAndKeywordRecognitionHandler.StopContinuousRecognizer();}#endregion
}

2、SpeechRecognitionHandler

using UnityEngine;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System;
using Task = System.Threading.Tasks.Task;/// <summary>
/// 语音识别转文本和关键词识别
/// </summary>
public class SpeechRecognitionHandler
{#region Data/// <summary>/// /// </summary>const string TAG = "[SpeechAndKeywordRecognitionHandler] ";/// <summary>/// 识别配置/// </summary>private SpeechConfig m_SpeechConfig;/// <summary>/// 音频配置/// </summary>private AudioConfig m_AudioConfig;/// <summary>/// 语音识别/// </summary>private SpeechRecognizer m_SpeechRecognizer;/// <summary>/// LLM 大模型配置/// </summary>private ASRConfig m_ASRConfig;/// <summary>/// 识别的事件/// </summary>public Action<string> onRecognizingAction;public Action<string> onRecognizedSpeechAction;public Action<string> onErrorAction;public Action<string> onSessionStoppedAction;#endregion#region Public function/// <summary>/// 初始化/// </summary>/// <returns></returns>public async void Initialized(){m_ASRConfig = new ASRConfig();Debug.Log(TAG + "m_LLMConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE " + m_ASRConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE);Debug.Log(TAG + "m_LLMConfig.AZURE_SPEECH_REGION " + m_ASRConfig.AZURE_SPEECH_REGION);m_SpeechConfig = SpeechConfig.FromSubscription(m_ASRConfig.AZURE_SPEECH_KEY, m_ASRConfig.AZURE_SPEECH_REGION);m_SpeechConfig.SpeechRecognitionLanguage = m_ASRConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE;m_AudioConfig = AudioConfig.FromDefaultMicrophoneInput();Debug.Log(TAG + " Initialized 2 ====");// 根据自己需要处理(不需要也行)await Task.Delay(100);}#endregion#region Private function/// <summary>/// 设置识别回调事件/// </summary>private void SetRecoginzeCallback(){Debug.Log(TAG + " SetRecoginzeCallback == ");if (m_SpeechRecognizer != null){m_SpeechRecognizer.Recognizing += OnRecognizing;m_SpeechRecognizer.Recognized += OnRecognized;m_SpeechRecognizer.Canceled += OnCanceled;m_SpeechRecognizer.SessionStopped += OnSessionStopped;Debug.Log(TAG+" SetRecoginzeCallback OK ");}}#endregion#region Callback/// <summary>/// 正在识别/// </summary>/// <param name="s"></param>/// <param name="e"></param>private void OnRecognizing(object s, SpeechRecognitionEventArgs e){Debug.Log(TAG + "RecognizingSpeech:" + e.Result.Text + " :[e.Result.Reason]:" + e.Result.Reason);if (e.Result.Reason == ResultReason.RecognizingSpeech ){Debug.Log(TAG + " Trigger onRecognizingAction is null :" + onRecognizingAction == null);onRecognizingAction?.Invoke(e.Result.Text);}}/// <summary>/// 识别结束/// </summary>/// <param name="s"></param>/// <param name="e"></param>private void OnRecognized(object s, SpeechRecognitionEventArgs e){Debug.Log(TAG + "RecognizedSpeech:" + e.Result.Text + " :[e.Result.Reason]:" + e.Result.Reason);if (e.Result.Reason == ResultReason.RecognizedSpeech ){bool tmp = onRecognizedSpeechAction == null;Debug.Log(TAG + " Trigger onRecognizedSpeechAction is null :" + tmp);onRecognizedSpeechAction?.Invoke(e.Result.Text);}}/// <summary>/// 识别取消/// </summary>/// <param name="s"></param>/// <param name="e"></param>private void OnCanceled(object s, SpeechRecognitionCanceledEventArgs e){Debug.LogFormat(TAG+"Canceled: Reason={0}", e.Reason );if (e.Reason == CancellationReason.Error){onErrorAction?.Invoke(e.ErrorDetails);}}/// <summary>/// 会话结束/// </summary>/// <param name="s"></param>/// <param name="e"></param>private void OnSessionStopped(object s, SessionEventArgs e){Debug.Log(TAG+"Session stopped event." );onSessionStoppedAction?.Invoke("Session stopped event.");}#endregion#region 连续语音识别转文本/// <summary>/// 开启连续语音识别转文本/// </summary>public void StartContinuousRecognizer(){Debug.LogWarning(TAG + "StartContinuousRecognizer");try{// 转到异步中(根据自己需要处理)Loom.RunAsync(async () => {try{if (m_SpeechRecognizer != null){m_SpeechRecognizer.Dispose();m_SpeechRecognizer = null;}if (m_SpeechRecognizer == null){m_SpeechRecognizer = new SpeechRecognizer(m_SpeechConfig, m_AudioConfig);SetRecoginzeCallback();}await m_SpeechRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);Loom.QueueOnMainThread(() => {Debug.LogWarning(TAG + "StartContinuousRecognizer QueueOnMainThread ok");});Debug.LogWarning(TAG + "StartContinuousRecognizer RunAsync ok");}catch (Exception e){Loom.QueueOnMainThread(() =>{Debug.LogError(TAG + " StartContinuousRecognizer 0 " + e);});}});}catch (Exception e){Debug.LogError(TAG + " StartContinuousRecognizer 1 " + e);}}/// <summary>/// 结束连续语音识别转文本/// </summary>public async Task StopContinuousRecognizer(){try{if (m_SpeechRecognizer != null){await m_SpeechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);//m_SpeechRecognizer.Dispose();//m_SpeechRecognizer = null;Debug.LogWarning(TAG + " StopContinuousRecognizer");}}catch (Exception e){Debug.LogError(TAG + " StopContinuousRecognizer Exception : " + e);}}#endregion}

3、ASRConfig

public class ASRConfig 
{#region Azure ASR/// <summary>/// AZURE_SPEECH_KEY/// </summary>public virtual string AZURE_SPEECH_KEY { get; } = @"098dc5aa21a14758a347dcac36d7eb66";/// <summary>/// AZURE_SPEECH_REGION/// </summary>public virtual string AZURE_SPEECH_REGION { get; } = @"eastasia";/// <summary>/// AZURE_SPEECH_RECOGNITION_LANGUAGE/// </summary>public virtual string AZURE_SPEECH_RECOGNITION_LANGUAGE { get; } = @"zh-CN";#endregion
}

http://www.lryc.cn/news/281035.html

相关文章:

  • MLP-Mixer: An all-MLP Architecture for Vision
  • redis前缀匹配数据迁移数据
  • 云贝教育 |【技术文章】存储对象的LIBRARY CACHE LOCK/PIN实验(一)
  • C# 快速模指数运算 快速求余运算
  • Chisel入门初步0
  • MySQL 8.0中移除的功能(一)
  • 可抓取性和可索引性:它们是什么以及如何影响搜索引擎优化
  • Django教程第4章 | Web开发实战-三种验证码实现
  • 深度探讨 Golang 中并发发送 HTTP 请求的最佳技术
  • VUE指令(二)
  • 开源对象存储服务器MinIO本地部署并结合内网穿透实现远程访问管理界面
  • 【TypeScript】tsconfig.json文件到底是干啥的?作用是什么?
  • wagtail的数据模型和渲染
  • OpenHarmony4.0适配LVDS屏幕驱动
  • 【playwright】新一代自动化测试神器playwright+python系列课程01-playwright驱动浏览器
  • POSIX API与网络协议栈
  • 互联网加竞赛 基于卷积神经网络的乳腺癌分类 深度学习 医学图像
  • 腾讯云 IPv6 解决方案
  • Appium 自动化测试
  • 深入浅出Android dmabuf_dump工具
  • Guava RateLimiter预热模型
  • 【搭建个人知识库-3】
  • 如何看待 Linux 内核邮件列表重启将内核中的 C 代码转换为 C++
  • springboot网关添加swagger
  • 代码随想录 Leetcode383. 赎金信
  • 上下左右视频转场模板PR项目工程文件 Vol. 05
  • 【正点原子STM32连载】第三十三章 单通道ADC采集实验 摘自【正点原子】APM32E103最小系统板使用指南
  • Linux系统使用docker部署Geoserver(简单粗暴,复制即用)
  • libcurl使用默认编译的winssl进行https的双向认证
  • MySQL运维实战(3.3) 管理数据库(database)