【数字人开发】Unity+百度智能云平台实现长短文本个性化语音生成功能
一、创建自己的应用
百度智能云控制台网址:https://console.bce.baidu.com/
1、创建应用
2、获取APIKey和SecretKey
3、Api调试
调试网址:https://console.bce.baidu.com/support/?timestamp=1750317430400#/api?product=AI&project=%E8%AF%AD%E9%9F%B3%E6%8A%80%E6%9C%AF&parent=%E9%89%B4%E6%9D%83%E8%AE%A4%E8%AF%81%E6%9C%BA%E5%88%B6&api=oauth%2F2.0%2Ftoken&method=post
二、在Unity中进行调用
1、相关参数说明
(1)短文本个性化语音生成相关参数
(2)长文本个性化语音生成相关参数
2、完整代码
using Newtonsoft.Json;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;
using UnityEngine.UI;
public class TTS : MonoBehaviour
{#region 相关参数[Header("鉴权相关参数")][SerializeField] private string apiKey = "LFfK6DTaswy6LLtBqvHO86w0";[SerializeField] private string secretKey = "vj6JmKd7zBylDVGW2WmTNPWl9eKxxZEL";[SerializeField] private string accessToken = null;[Space][Header("长文本语音合成参数设置")][SerializeField] private string format = "mp3-16k"; // 或者 wav[SerializeField] private int voice = 0; // 语音人:0-女,1-男等[SerializeField] private string lang = "zh";[SerializeField] private int speed = 5; // 0~15[SerializeField] private int pitch = 5; // 0~15[SerializeField] private int volume = 5; // 0~15[SerializeField] private int enable_subtitle = 0;[Space][Header("短文本语音合成参数设置")][SerializeField] private string cuid = "240a906f2b88794fd0426442c4136a5a57bf5c01";[SerializeField] private string ctp = "1";[SerializeField] private string lan = "zh";[SerializeField] private string spd = "5";[SerializeField] private string pit = "5";[SerializeField] private string vol = "10";[SerializeField] private string per = "1";[SerializeField] private string aue = "3";[Space][Header("UI界面相关")]public InputField inputFieldText;public Button buttonStartTTS;//开始合成按钮public Button buttonPlay;//播放合成的语音按钮public AudioSource audioSource;//播放音频组件#endregion// Start is called before the first frame updatevoid Start(){//一开始就进行鉴权StartCoroutine(GetAccessToken());//语音合成buttonStartTTS.onClick.AddListener(()=> {StartTTS(inputFieldText.text, audioSource);});//播放语音buttonPlay.onClick.AddListener(() =>{if (audioSource.clip != null){audioSource.Play();}});}/// <summary>/// 长短语音合成方法/// </summary>/// <param name="text">要合成的文本内容</param>/// <param name="audioSource">语音组件</param>public void StartTTS(string text,AudioSource audioSource){if (text.Length<60){print("开始短文本语音合成");//短文本语音合成StartCoroutine(ShortTTS(text, response => {audioSource.clip = response.clip;print("短文本语音合成结束,请播放");}));}else{print("开始长文本语音合成");//长文本语音合成StartCoroutine(LongTTS(text, clip=> { audioSource.clip = clip;print("长文本语音合成结束,请播放");}));}}#region 鉴权相关/// <summary>/// 鉴权方法/// </summary>/// <returns></returns>/// <summary>/// 获取百度 AccessToken(已使用 using 自动释放资源)/// </summary>public IEnumerator GetAccessToken(){string url = "https://aip.baidubce.com/oauth/2.0/token";WWWForm form = new WWWForm();form.AddField("grant_type", "client_credentials");form.AddField("client_id", apiKey);form.AddField("client_secret", secretKey);using (UnityWebRequest request = UnityWebRequest.Post(url, form)){yield return request.SendWebRequest();if (request.result == UnityWebRequest.Result.Success){try{var tokenResponse = JsonConvert.DeserializeObject<TokenResponse>(request.downloadHandler.text);accessToken = tokenResponse.access_token;Debug.Log("✅ 获取语音合成 AccessToken 成功: " + accessToken);}catch (Exception ex){Debug.LogError("❌ 语音合成AccessToken 解析失败: " + ex.Message);}}else{Debug.LogError("❌ 获取 语音合成AccessToken 失败: " + request.error);}}}#endregion#region 短文本语音合成相关/// <summary>/// 请求短文本语音合成(MP3格式)/// </summary>/// <param name="text">需要合成的文本</param>/// <param name="callback">返回结果回调</param>public IEnumerator ShortTTS(string text, Action<TtsResponse> callback){string url = "http://tsn.baidu.com/text2audio";var param = new Dictionary<string, string>{{ "tex", text },{ "tok", accessToken },{ "cuid", cuid},{ "ctp", ctp},{ "lan", lan},{ "spd", spd},{ "pit", pit},{ "vol", vol},{ "per", per},{ "aue", aue} // 固定为 MP3 格式};// 构建请求 URLint i = 0;foreach (var p in param){url += i != 0 ? "&" : "?";url += p.Key + "=" + UnityWebRequest.EscapeURL(p.Value);i++;}using (UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG)){//Debug.Log("[TTS] 请求URL: " + www.url);//测试使用yield return www.SendWebRequest();if (www.result != UnityWebRequest.Result.Success){Debug.LogError("[TTS] 请求失败: " + www.error);callback?.Invoke(new TtsResponse{error_index = -1,error_msg = www.error});}else{string type = www.GetResponseHeader("Content-Type");//Debug.Log("[TTS] Content-Type: " + type);//测试使用if (!string.IsNullOrEmpty(type) && type.Contains("audio")){AudioClip clip = DownloadHandlerAudioClip.GetContent(www);callback?.Invoke(new TtsResponse { clip = clip });}else{string errorText = Encoding.UTF8.GetString(www.downloadHandler.data);Debug.LogError("[TTS] 文本响应错误: " + errorText);callback?.Invoke(new TtsResponse{error_index = -2,error_msg = errorText});}}}}/// <summary>/// 返回的语音合成结果/// </summary>public class TtsResponse{public int error_index;public string error_msg;public string sn;public int idx;public bool Success => error_index == 0;public AudioClip clip;}#endregion#region 长文本语音合成相关/// <summary>/// 按顺序执行长语音合成对应的方法/// </summary>/// <param name="text">需要合成的文本</param>/// <param name="callback">回调函数,返回合成的clip</param>/// <returns></returns>IEnumerator LongTTS(String text, Action<AudioClip> callback){string taskId = null;//语音合成任务创建成功返回的id//创建语音合成任务yield return StartCoroutine(CreateTTSTask(text,TaskId => { taskId = TaskId; },errorMsg => { Debug.LogError("❌ 合成失败: " + errorMsg); }));if (taskId != null && accessToken != null){string audioUrl = null;//语音合成任务合成成功返回的语音下载链接//查询语音合成任务yield return StartCoroutine(QueryTTSTaskStatus(accessToken, taskId,AudioAddress => { audioUrl = AudioAddress; },errorMsg => {Debug.LogError("❌ 查询失败:" + errorMsg);}));//下载语音,并赋值给指定的AudioSource组件if (audioUrl != null){yield return StartCoroutine(DownloadAudio(audioUrl, clip =>{if (clip != null){callback?.Invoke(clip);}else{Debug.LogError("下载的音频 Clip 为 null");}}));}}}/// <summary>/// 创建语音合成任务/// </summary>/// <returns>TaskId</returns>public IEnumerator CreateTTSTask(string text, Action<string> onSuccess, Action<string> onError){string url = $"https://aip.baidubce.com/rpc/2.0/tts/v1/create?access_token={accessToken}";var bodyObj = new{text = text,format = format,voice = voice,lang = lang,speed = speed,pitch = pitch,volume = volume,enable_subtitle = enable_subtitle};string jsonBody = JsonConvert.SerializeObject(bodyObj);using (UnityWebRequest request = new UnityWebRequest(url, "POST")){byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonBody);request.uploadHandler = new UploadHandlerRaw(bodyRaw);request.downloadHandler = new DownloadHandlerBuffer();request.SetRequestHeader("Content-Type", "application/json");request.SetRequestHeader("Accept", "application/json");yield return request.SendWebRequest();if (request.result == UnityWebRequest.Result.Success){string responseText = request.downloadHandler.text;Debug.Log("✅ 创建语音任务返回:" + responseText);if (responseText.Contains("task_id")){var response = JsonConvert.DeserializeObject<TTSTaskSuccessResponse>(responseText);onSuccess?.Invoke(response.TaskId);}else if (responseText.Contains("error_code")){var error = JsonConvert.DeserializeObject<TTSTaskErrorResponse>(responseText);onError?.Invoke(error.ErrorMsg);}else{onError?.Invoke("无法识别的返回内容");}}else{Debug.LogError("❌ 网络请求失败:" + request.error);onError?.Invoke(request.error);}}}/// <summary>/// 语音合成任务查询/// </summary>/// <param name="accessToken">accessToken</param>/// <param name="taskId">合成任务id</param>/// <param name="onSuccess">合成成功返回音频链接</param>/// <param name="onError">合成失败返回错误码</param>/// <returns></returns>public IEnumerator QueryTTSTaskStatus(string accessToken, string taskId, Action<string> onSuccess, Action<string> onError){string url = $"https://aip.baidubce.com/rpc/2.0/tts/v1/query?access_token={accessToken}";string jsonBody = JsonConvert.SerializeObject(new { task_ids = new string[] { taskId } });float delaySeconds = 2f;while (true){using (UnityWebRequest request = new UnityWebRequest(url, "POST")){request.uploadHandler = new UploadHandlerRaw(Encoding.UTF8.GetBytes(jsonBody));request.downloadHandler = new DownloadHandlerBuffer();request.SetRequestHeader("Content-Type", "application/json");request.SetRequestHeader("Accept", "application/json");yield return request.SendWebRequest();if (request.result == UnityWebRequest.Result.Success){string json = request.downloadHandler.text;var root = JsonConvert.DeserializeObject<TTSQueryResponse>(json);if (root.TasksInfo != null && root.TasksInfo.Count > 0){var task = root.TasksInfo[0];switch (task.TaskStatus){case "Success":if (!string.IsNullOrEmpty(task.TaskResult?.SpeechUrl))onSuccess?.Invoke(task.TaskResult.SpeechUrl);elseonError?.Invoke("合成成功但未返回语音地址");yield break;case "Failure":onError?.Invoke(task.TaskResult?.ErrMsg ?? "未知错误");yield break;case "Running":Debug.Log("🎙 正在合成...");yield return new WaitForSeconds(delaySeconds);continue;default:onError?.Invoke("未知状态:" + task.TaskStatus);yield break;}}else{onError?.Invoke("未找到任务信息");yield break;}}else{onError?.Invoke("网络错误:" + request.error);yield break;}}}}/// <summary>/// 下载音频,并将音频赋给指定的AudioSource/// </summary>/// <param name="url">音频下载链接</param>/// <param name="audioSource">要赋给的音频播放组件</param>/// <returns></returns>public IEnumerator DownloadAudio(string url, Action<AudioClip> onComplete){using (UnityWebRequest request = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG)){yield return request.SendWebRequest();if (request.result == UnityWebRequest.Result.Success){AudioClip clip = DownloadHandlerAudioClip.GetContent(request);if (clip != null){Debug.Log("✅ 音频合成结束,等待播放");onComplete?.Invoke(clip); // ✅ 返回 clip}else{Debug.LogError("❌ 无法解析音频 Clip");onComplete?.Invoke(null);}}else{Debug.LogError("❌ 下载音频失败:" + request.error);onComplete?.Invoke(null);}}}[Serializable]public class TokenResponse{/// <summary>/// 鉴权返回的数据JSON结构/// </summary>public string access_token;public int expires_in;}[Serializable]public class TTSTaskSuccessResponse{/// <summary>/// 创建语音合成成功返回的数据JSON结构/// </summary>[JsonProperty("log_id")]public long LogId { get; set; }[JsonProperty("task_id")]public string TaskId { get; set; }[JsonProperty("task_status")]public string TaskStatus { get; set; } // "Running"}[Serializable]public class TTSTaskErrorResponse{/// <summary>/// 创建语音合成成功返回的数据JSON结构/// </summary>[JsonProperty("error_code")]public int ErrorCode { get; set; }[JsonProperty("error_msg")]public string ErrorMsg { get; set; }[JsonProperty("log_id")]public long LogId { get; set; }}[Serializable]public class TTSQueryResponse{[JsonProperty("log_id")]public long LogId { get; set; }[JsonProperty("tasks_info")]public List<TTSQueryTaskInfo> TasksInfo { get; set; }}[Serializable]public class TTSQueryTaskInfo{[JsonProperty("task_id")]public string TaskId { get; set; }[JsonProperty("task_status")]public string TaskStatus { get; set; }[JsonProperty("task_result")]public TTSQueryTaskResult TaskResult { get; set; }}[Serializable]public class TTSQueryTaskResult{[JsonProperty("speech_url")]public string SpeechUrl { get; set; }[JsonProperty("err_no")]public int ErrNo { get; set; }[JsonProperty("err_msg")]public string ErrMsg { get; set; }[JsonProperty("sn")]public string Sn { get; set; }}#endregion}