【数字人开发】Unity+百度智能云平台实现短语音文本识别功能
一、创建自己的语音识别应用
百度智能云控制台网址:https://console.bce.baidu.com/
1、创建应用
2、获取APIKey和SecretKey
3、Api调试
调试网址:https://console.bce.baidu.com/support/?timestamp=1750317430400#/api?product=AI&project=%E8%AF%AD%E9%9F%B3%E6%8A%80%E6%9C%AF&parent=%E9%89%B4%E6%9D%83%E8%AE%A4%E8%AF%81%E6%9C%BA%E5%88%B6&api=oauth%2F2.0%2Ftoken&method=post
二、在Unity中进行调用
1、短语音识别标准版相关参数
2、完整代码
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;
using Newtonsoft.Json;
using UnityEngine.UI;public class ASR : MonoBehaviour
{[Header("百度语音识别配置")][SerializeField] private string apiKey = "LFfK6DTaswy6LLtBqvHO86w0";[SerializeField] private string secretKey = "vj6JmKd7zBylDVGW2WmTNPWl9eKxxZEL";[SerializeField] private string accessToken;[Space][Header("短语音识别标准版参数设置")][SerializeField] private string format = "pcm";[SerializeField] private int rate = 16000;[SerializeField] private int channel = 1;[SerializeField] private string cuid = "240a906f2b88794fd0426442c4136a5a57bf5c01";[SerializeField] private int dev_pid = 1537;[Space][Header("测试使用")]public AudioSource audioSource;[Space][Header("UI相关")]public Button buttonStartASR;public Button buttonRecord;public Text textResult;// Start is called before the first frame updatevoid Start(){//一开始就进行鉴权StartCoroutine(GetAccessToken());//开始识别buttonStartASR.onClick.AddListener(() =>{print("开始识别");StartCoroutine(RecognizeFromClip(audioSource.clip, onSuccess => { textResult.text = onSuccess; }, onError => { Debug.Log(onError); }));});}#region 短语音识别相关/// <summary>/// 短语音识别方法/// </summary>/// <param name="clip">待识别语音</param>/// <param name="onSuccess">识别成功返回结果(文本)</param>/// <param name="onError">识别成功返回问题</param>/// <returns></returns>public IEnumerator RecognizeFromClip(AudioClip clip, Action<string> onSuccess, Action<string> onError){if (accessToken == null){onError?.Invoke("accessToken未获取");yield break;}// 转换 clip 为 PCM 数据(16bit)byte[] pcmData = ConvertClipToPCM16(clip);if (pcmData == null){onError?.Invoke("音频格式错误或转换失败");yield break;}string base64Audio = Convert.ToBase64String(pcmData);var requestData = new{format = format,rate = rate,channel = channel,cuid = cuid,token = accessToken,dev_pid = dev_pid,// 普通话输入法模型speech = base64Audio,len = pcmData.Length };string jsonBody = JsonConvert.SerializeObject(requestData);using (UnityWebRequest request = new UnityWebRequest("https://vop.baidu.com/server_api", "POST")){byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonBody);request.uploadHandler = new UploadHandlerRaw(bodyRaw);request.downloadHandler = new DownloadHandlerBuffer();request.SetRequestHeader("Content-Type", "application/json");request.SetRequestHeader("Accept", "application/json");yield return request.SendWebRequest();if (request.result != UnityWebRequest.Result.Success){onError?.Invoke("网络错误: " + request.error);}else{string responseText = request.downloadHandler.text;Debug.Log("🎤 识别结果: " + responseText);var result = JsonConvert.DeserializeObject<ASRResponse>(responseText);if (result.err_no == 0)onSuccess?.Invoke(string.Join("", result.result));elseonError?.Invoke($"识别失败(错误码{result.err_no}):{result.err_msg}");}}}/// <summary>/// 语音格式转换方法,转为 PCM16 格式/// </summary>/// <param name="clip">需要转换的音频</param>/// <returns>返回转换后的音频结果</returns>// 将 AudioClip 转为 PCM16 格式private byte[] ConvertClipToPCM16(AudioClip clip){if (clip.channels != 1 || clip.frequency != 16000){Debug.LogError("❌ 仅支持 16kHz 单通道音频");return null;}float[] samples = new float[clip.samples];clip.GetData(samples, 0);byte[] pcm = new byte[samples.Length * 2]; // 16-bit = 2 bytesfor (int i = 0; i < samples.Length; i++){short value = (short)(samples[i] * short.MaxValue);byte[] bytes = BitConverter.GetBytes(value);pcm[i * 2] = bytes[0];pcm[i * 2 + 1] = bytes[1];}return pcm;}[Serializable]public class ASRResponse{/// <summary>/// 短文本语音识别返回结构/// </summary>public int err_no;public string err_msg;public string sn;public string[] result;}#endregion#region 鉴权相关/// <summary>/// 鉴权方法/// </summary>/// <returns></returns>/// <summary>/// 获取百度 AccessToken(已使用 using 自动释放资源)/// </summary>public IEnumerator GetAccessToken(){string url = "https://aip.baidubce.com/oauth/2.0/token";WWWForm form = new WWWForm();form.AddField("grant_type", "client_credentials");form.AddField("client_id", apiKey);form.AddField("client_secret", secretKey);using (UnityWebRequest request = UnityWebRequest.Post(url, form)){yield return request.SendWebRequest();if (request.result == UnityWebRequest.Result.Success){try{var tokenResponse = JsonConvert.DeserializeObject<TokenResponse>(request.downloadHandler.text);accessToken = tokenResponse.access_token;Debug.Log("✅ 短语音识别获取 AccessToken 成功: " + accessToken);}catch (Exception ex){Debug.LogError("❌ 短语音识别AccessToken 解析失败: " + ex.Message);}}else{Debug.LogError("❌ 短语音识别获取 AccessToken 失败: " + request.error);}}}[Serializable]public class TokenResponse{/// <summary>/// 鉴权返回结构/// </summary>public string access_token;}#endregion
}