【数字人开发】Unity+百度智能云平台实现长短文本个性化语音生成功能

发布于:2025-06-20 ⋅ 阅读:(21) ⋅ 点赞:(0)

一、创建自己的应用

百度智能云控制台网址:https://console.bce.baidu.com/

1、创建应用

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

2、获取APIKey和SecretKey

在这里插入图片描述

3、Api调试

调试网址:https://console.bce.baidu.com/support/?timestamp=1750317430400#/api?product=AI&project=%E8%AF%AD%E9%9F%B3%E6%8A%80%E6%9C%AF&parent=%E9%89%B4%E6%9D%83%E8%AE%A4%E8%AF%81%E6%9C%BA%E5%88%B6&api=oauth%2F2.0%2Ftoken&method=post
在这里插入图片描述

二、在Unity中进行调用

1、相关参数说明

(1)短文本个性化语音生成相关参数

在这里插入图片描述

(2)长文本个性化语音生成相关参数

在这里插入图片描述

2、完整代码

using Newtonsoft.Json;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;
using UnityEngine.UI;
public class TTS : MonoBehaviour
{
    #region 相关参数
    [Header("鉴权相关参数")]
    [SerializeField] private string apiKey = "LFfK6DTaswy6LLtBqvHO86w0";
    [SerializeField] private string secretKey = "vj6JmKd7zBylDVGW2WmTNPWl9eKxxZEL";
    [SerializeField] private string accessToken = null;

    [Space]
    [Header("长文本语音合成参数设置")]
    [SerializeField] private string format = "mp3-16k"; // 或者 wav
    [SerializeField] private int voice = 0; // 语音人:0-女,1-男等
    [SerializeField] private string lang = "zh";
    [SerializeField] private int speed = 5; // 0~15
    [SerializeField] private int pitch = 5; // 0~15
    [SerializeField] private int volume = 5; // 0~15
    [SerializeField] private int enable_subtitle = 0;

    [Space]
    [Header("短文本语音合成参数设置")]
    [SerializeField] private string cuid = "240a906f2b88794fd0426442c4136a5a57bf5c01";
    [SerializeField] private string ctp = "1";
    [SerializeField] private string lan = "zh";
    [SerializeField] private string spd = "5";
    [SerializeField] private string pit = "5";
    [SerializeField] private string vol = "10";
    [SerializeField] private string per = "1";
    [SerializeField] private string aue = "3";

    [Space]
    [Header("UI界面相关")]
    public InputField inputFieldText;
    public Button buttonStartTTS;//开始合成按钮
    public Button buttonPlay;//播放合成的语音按钮
    public AudioSource audioSource;//播放音频组件
    #endregion

    // Start is called before the first frame update
    void Start()
    {
        //一开始就进行鉴权
        StartCoroutine(GetAccessToken());
        //语音合成
        buttonStartTTS.onClick.AddListener(()=> {
            StartTTS(inputFieldText.text, audioSource);
        });
        //播放语音
        buttonPlay.onClick.AddListener(() =>
        {
            if (audioSource.clip != null)
            {
                audioSource.Play();
            }
        });
    }

    /// <summary>
    /// 长短语音合成方法
    /// </summary>
    /// <param name="text">要合成的文本内容</param>
    /// <param name="audioSource">语音组件</param>
    public void StartTTS(string text,AudioSource audioSource)
    {
        if (text.Length<60)
        {
            print("开始短文本语音合成");
            //短文本语音合成
            StartCoroutine(ShortTTS(text, response => {
                audioSource.clip = response.clip;
                print("短文本语音合成结束,请播放");
            }));
        }
        else
        {
            print("开始长文本语音合成");
            //长文本语音合成
            StartCoroutine(LongTTS(text, clip=> { 
                audioSource.clip = clip;
                print("长文本语音合成结束,请播放");
            }));
        }


    }

    #region 鉴权相关
    /// <summary>
    /// 鉴权方法
    /// </summary>
    /// <returns></returns>
    /// <summary>
    /// 获取百度 AccessToken(已使用 using 自动释放资源)
    /// </summary>
    public IEnumerator GetAccessToken()
    {
        string url = "https://aip.baidubce.com/oauth/2.0/token";

        WWWForm form = new WWWForm();
        form.AddField("grant_type", "client_credentials");
        form.AddField("client_id", apiKey);
        form.AddField("client_secret", secretKey);

        using (UnityWebRequest request = UnityWebRequest.Post(url, form))
        {
            yield return request.SendWebRequest();

            if (request.result == UnityWebRequest.Result.Success)
            {
                try
                {
                    var tokenResponse = JsonConvert.DeserializeObject<TokenResponse>(request.downloadHandler.text);
                    accessToken = tokenResponse.access_token;
                    Debug.Log("✅ 获取语音合成 AccessToken 成功: " + accessToken);
                }
                catch (Exception ex)
                {
                    Debug.LogError("❌ 语音合成AccessToken 解析失败: " + ex.Message);
                }
            }
            else
            {
                Debug.LogError("❌ 获取 语音合成AccessToken 失败: " + request.error);
            }
        }
    }
    #endregion


    #region 短文本语音合成相关

    /// <summary>
    /// 请求短文本语音合成(MP3格式)
    /// </summary>
    /// <param name="text">需要合成的文本</param>
    /// <param name="callback">返回结果回调</param>
    public IEnumerator ShortTTS(string text, Action<TtsResponse> callback)
    {
        string url = "http://tsn.baidu.com/text2audio";

        var param = new Dictionary<string, string>
    {
        { "tex", text },
        { "tok", accessToken },
        { "cuid", cuid},
        { "ctp", ctp},
        { "lan", lan},
        { "spd", spd},
        { "pit", pit},
        { "vol", vol},
        { "per", per},
        { "aue", aue} // 固定为 MP3 格式
    };

        // 构建请求 URL
        int i = 0;
        foreach (var p in param)
        {
            url += i != 0 ? "&" : "?";
            url += p.Key + "=" + UnityWebRequest.EscapeURL(p.Value);
            i++;
        }
        using (UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG))
        {
            //Debug.Log("[TTS] 请求URL: " + www.url);//测试使用
            yield return www.SendWebRequest();

            if (www.result != UnityWebRequest.Result.Success)
            {
                Debug.LogError("[TTS] 请求失败: " + www.error);
                callback?.Invoke(new TtsResponse
                {
                    error_index = -1,
                    error_msg = www.error
                });
            }
            else
            {
                string type = www.GetResponseHeader("Content-Type");
                //Debug.Log("[TTS] Content-Type: " + type);//测试使用

                if (!string.IsNullOrEmpty(type) && type.Contains("audio"))
                {
                    AudioClip clip = DownloadHandlerAudioClip.GetContent(www);
                    callback?.Invoke(new TtsResponse { clip = clip });
                }
                else
                {
                    string errorText = Encoding.UTF8.GetString(www.downloadHandler.data);
                    Debug.LogError("[TTS] 文本响应错误: " + errorText);
                    callback?.Invoke(new TtsResponse
                    {
                        error_index = -2,
                        error_msg = errorText
                    });
                }
            }
        }
    }

    /// <summary>
    /// 返回的语音合成结果
    /// </summary>
    public class TtsResponse
    {
        public int error_index;
        public string error_msg;
        public string sn;
        public int idx;
        public bool Success => error_index == 0;
        public AudioClip clip;
    }
    #endregion

    #region 长文本语音合成相关

    /// <summary>
    /// 按顺序执行长语音合成对应的方法
    /// </summary>
    /// <param name="text">需要合成的文本</param>
    /// <param name="callback">回调函数,返回合成的clip</param>
    /// <returns></returns>
    IEnumerator LongTTS(String text, Action<AudioClip> callback)
    {
        string taskId = null;//语音合成任务创建成功返回的id
        //创建语音合成任务
        yield return StartCoroutine(CreateTTSTask(text,
            TaskId => { taskId = TaskId; },
            errorMsg => { Debug.LogError("❌ 合成失败: " + errorMsg); }));

        if (taskId != null && accessToken != null)
        {
            string audioUrl = null;//语音合成任务合成成功返回的语音下载链接
            //查询语音合成任务
            yield return StartCoroutine(QueryTTSTaskStatus(accessToken, taskId,
                AudioAddress => { audioUrl = AudioAddress; },
                errorMsg => {
                    Debug.LogError("❌ 查询失败:" + errorMsg);
                }));

            //下载语音,并赋值给指定的AudioSource组件
            if (audioUrl != null)
            {
                yield return StartCoroutine(DownloadAudio(audioUrl, clip =>
                {
                    if (clip != null)
                    {
                        callback?.Invoke(clip);
                    }
                    else
                    {
                        Debug.LogError("下载的音频 Clip 为 null");
                    }
                }));
            }
        }
    }

    /// <summary>
    /// 创建语音合成任务
    /// </summary>
    /// <returns>TaskId</returns>
    public IEnumerator CreateTTSTask(string text, Action<string> onSuccess, Action<string> onError)
    {
        string url = $"https://aip.baidubce.com/rpc/2.0/tts/v1/create?access_token={accessToken}";

        var bodyObj = new
        {
            text = text,
            format = format,
            voice = voice,
            lang = lang,
            speed = speed,
            pitch = pitch,
            volume = volume,
            enable_subtitle = enable_subtitle
        };

        string jsonBody = JsonConvert.SerializeObject(bodyObj);

        using (UnityWebRequest request = new UnityWebRequest(url, "POST"))
        {
            byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonBody);
            request.uploadHandler = new UploadHandlerRaw(bodyRaw);
            request.downloadHandler = new DownloadHandlerBuffer();
            request.SetRequestHeader("Content-Type", "application/json");
            request.SetRequestHeader("Accept", "application/json");

            yield return request.SendWebRequest();

            if (request.result == UnityWebRequest.Result.Success)
            {
                string responseText = request.downloadHandler.text;
                Debug.Log("✅ 创建语音任务返回:" + responseText);

                if (responseText.Contains("task_id"))
                {
                    var response = JsonConvert.DeserializeObject<TTSTaskSuccessResponse>(responseText);
                    onSuccess?.Invoke(response.TaskId);
                }
                else if (responseText.Contains("error_code"))
                {
                    var error = JsonConvert.DeserializeObject<TTSTaskErrorResponse>(responseText);
                    onError?.Invoke(error.ErrorMsg);
                }
                else
                {
                    onError?.Invoke("无法识别的返回内容");
                }
            }
            else
            {
                Debug.LogError("❌ 网络请求失败:" + request.error);
                onError?.Invoke(request.error);
            }
        }
    }

    /// <summary>
    /// 语音合成任务查询
    /// </summary>
    /// <param name="accessToken">accessToken</param>
    /// <param name="taskId">合成任务id</param>
    /// <param name="onSuccess">合成成功返回音频链接</param>
    /// <param name="onError">合成失败返回错误码</param>
    /// <returns></returns>
    public IEnumerator QueryTTSTaskStatus(string accessToken, string taskId, Action<string> onSuccess, Action<string> onError)
    {
        string url = $"https://aip.baidubce.com/rpc/2.0/tts/v1/query?access_token={accessToken}";
        string jsonBody = JsonConvert.SerializeObject(new { task_ids = new string[] { taskId } });
        float delaySeconds = 2f;

        while (true)
        {
            using (UnityWebRequest request = new UnityWebRequest(url, "POST"))
            {
                request.uploadHandler = new UploadHandlerRaw(Encoding.UTF8.GetBytes(jsonBody));
                request.downloadHandler = new DownloadHandlerBuffer();
                request.SetRequestHeader("Content-Type", "application/json");
                request.SetRequestHeader("Accept", "application/json");

                yield return request.SendWebRequest();

                if (request.result == UnityWebRequest.Result.Success)
                {
                    string json = request.downloadHandler.text;
                    var root = JsonConvert.DeserializeObject<TTSQueryResponse>(json);

                    if (root.TasksInfo != null && root.TasksInfo.Count > 0)
                    {
                        var task = root.TasksInfo[0];
                        switch (task.TaskStatus)
                        {
                            case "Success":
                                if (!string.IsNullOrEmpty(task.TaskResult?.SpeechUrl))
                                    onSuccess?.Invoke(task.TaskResult.SpeechUrl);
                                else
                                    onError?.Invoke("合成成功但未返回语音地址");
                                yield break;

                            case "Failure":
                                onError?.Invoke(task.TaskResult?.ErrMsg ?? "未知错误");
                                yield break;

                            case "Running":
                                Debug.Log("🎙 正在合成...");
                                yield return new WaitForSeconds(delaySeconds);
                                continue;

                            default:
                                onError?.Invoke("未知状态:" + task.TaskStatus);
                                yield break;
                        }
                    }
                    else
                    {
                        onError?.Invoke("未找到任务信息");
                        yield break;
                    }
                }
                else
                {
                    onError?.Invoke("网络错误:" + request.error);
                    yield break;
                }
            }
        }
    }

    /// <summary>
    /// 下载音频,并将音频赋给指定的AudioSource
    /// </summary>
    /// <param name="url">音频下载链接</param>
    /// <param name="audioSource">要赋给的音频播放组件</param>
    /// <returns></returns>
    public IEnumerator DownloadAudio(string url, Action<AudioClip> onComplete)
    {
        using (UnityWebRequest request = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG))
        {
            yield return request.SendWebRequest();

            if (request.result == UnityWebRequest.Result.Success)
            {
                AudioClip clip = DownloadHandlerAudioClip.GetContent(request);
                if (clip != null)
                {
                    Debug.Log("✅ 音频合成结束,等待播放");
                    onComplete?.Invoke(clip); // ✅ 返回 clip
                }
                else
                {
                    Debug.LogError("❌ 无法解析音频 Clip");
                    onComplete?.Invoke(null);
                }
            }
            else
            {
                Debug.LogError("❌ 下载音频失败:" + request.error);
                onComplete?.Invoke(null);
            }
        }
    }

    [Serializable]
    public class TokenResponse
    {
        /// <summary>
        /// 鉴权返回的数据JSON结构
        /// </summary>
        public string access_token;
        public int expires_in;
    }

    [Serializable]
    public class TTSTaskSuccessResponse
    {
        /// <summary>
        /// 创建语音合成成功返回的数据JSON结构
        /// </summary>
        [JsonProperty("log_id")]
        public long LogId { get; set; }

        [JsonProperty("task_id")]
        public string TaskId { get; set; }

        [JsonProperty("task_status")]
        public string TaskStatus { get; set; } // "Running"
    }

    [Serializable]
    public class TTSTaskErrorResponse
    {
        /// <summary>
        /// 创建语音合成成功返回的数据JSON结构
        /// </summary>
        [JsonProperty("error_code")]
        public int ErrorCode { get; set; }

        [JsonProperty("error_msg")]
        public string ErrorMsg { get; set; }

        [JsonProperty("log_id")]
        public long LogId { get; set; }
    }

    [Serializable]
    public class TTSQueryResponse
    {
        [JsonProperty("log_id")]
        public long LogId { get; set; }

        [JsonProperty("tasks_info")]
        public List<TTSQueryTaskInfo> TasksInfo { get; set; }
    }

    [Serializable]
    public class TTSQueryTaskInfo
    {
        [JsonProperty("task_id")]
        public string TaskId { get; set; }

        [JsonProperty("task_status")]
        public string TaskStatus { get; set; }

        [JsonProperty("task_result")]
        public TTSQueryTaskResult TaskResult { get; set; }
    }

    [Serializable]
    public class TTSQueryTaskResult
    {
        [JsonProperty("speech_url")]
        public string SpeechUrl { get; set; }

        [JsonProperty("err_no")]
        public int ErrNo { get; set; }

        [JsonProperty("err_msg")]
        public string ErrMsg { get; set; }

        [JsonProperty("sn")]
        public string Sn { get; set; }
    }
    #endregion

}

3、最终效果

在这里插入图片描述


网站公告

今日签到

点亮在社区的每一天
去签到