【数字人开发】Unity+百度智能云平台实现短语音文本识别功能

发布于:2025-06-20 ⋅ 阅读:(18) ⋅ 点赞:(0)

一、创建自己的语音识别应用

百度智能云控制台网址:https://console.bce.baidu.com/

1、创建应用

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

2、获取APIKey和SecretKey

在这里插入图片描述

3、Api调试

调试网址:https://console.bce.baidu.com/support/?timestamp=1750317430400#/api?product=AI&project=%E8%AF%AD%E9%9F%B3%E6%8A%80%E6%9C%AF&parent=%E9%89%B4%E6%9D%83%E8%AE%A4%E8%AF%81%E6%9C%BA%E5%88%B6&api=oauth%2F2.0%2Ftoken&method=post
在这里插入图片描述

二、在Unity中进行调用

1、短语音识别标准版相关参数

在这里插入图片描述

2、完整代码

using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;
using Newtonsoft.Json;
using UnityEngine.UI;

public class ASR : MonoBehaviour
{
    [Header("百度语音识别配置")]
    [SerializeField] private string apiKey = "LFfK6DTaswy6LLtBqvHO86w0";
    [SerializeField] private string secretKey = "vj6JmKd7zBylDVGW2WmTNPWl9eKxxZEL";
    [SerializeField] private string accessToken;
    [Space]

    [Header("短语音识别标准版参数设置")]
    [SerializeField] private string format = "pcm";
    [SerializeField] private int rate = 16000;
    [SerializeField] private int channel = 1;
    [SerializeField] private string cuid = "240a906f2b88794fd0426442c4136a5a57bf5c01";
    [SerializeField] private int dev_pid = 1537;
    [Space]

    [Header("测试使用")]
    public AudioSource audioSource;
    [Space]

    [Header("UI相关")]
    public Button buttonStartASR;
    public Button buttonRecord;
    public Text textResult;
    

    // Start is called before the first frame update
    void Start()
    {
        //一开始就进行鉴权
        StartCoroutine(GetAccessToken());
        //开始识别
        buttonStartASR.onClick.AddListener(() =>
        {
            print("开始识别");
            StartCoroutine(RecognizeFromClip(audioSource.clip, 
                onSuccess => { textResult.text = onSuccess; }, 
                onError => { Debug.Log(onError); }));
        });
    }

    #region 短语音识别相关

    /// <summary>
    /// 短语音识别方法
    /// </summary>
    /// <param name="clip">待识别语音</param>
    /// <param name="onSuccess">识别成功返回结果(文本)</param>
    /// <param name="onError">识别成功返回问题</param>
    /// <returns></returns>
    public IEnumerator RecognizeFromClip(AudioClip clip, Action<string> onSuccess, Action<string> onError)
    {
        if (accessToken == null)
        {
            onError?.Invoke("accessToken未获取");
            yield break;
        }

        // 转换 clip 为 PCM 数据(16bit)
        byte[] pcmData = ConvertClipToPCM16(clip);
        if (pcmData == null)
        {
            onError?.Invoke("音频格式错误或转换失败");
            yield break;
        }

        string base64Audio = Convert.ToBase64String(pcmData);

        var requestData = new
        {
            format = format,
            rate = rate,
            channel = channel,
            cuid = cuid,
            token = accessToken,
            dev_pid = dev_pid,// 普通话输入法模型
            speech = base64Audio,
            len = pcmData.Length    
        };

        string jsonBody = JsonConvert.SerializeObject(requestData);

        using (UnityWebRequest request = new UnityWebRequest("https://vop.baidu.com/server_api", "POST"))
        {
            byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonBody);
            request.uploadHandler = new UploadHandlerRaw(bodyRaw);
            request.downloadHandler = new DownloadHandlerBuffer();
            request.SetRequestHeader("Content-Type", "application/json");
            request.SetRequestHeader("Accept", "application/json");

            yield return request.SendWebRequest();

            if (request.result != UnityWebRequest.Result.Success)
            {
                onError?.Invoke("网络错误: " + request.error);
            }
            else
            {
                string responseText = request.downloadHandler.text;
                Debug.Log("🎤 识别结果: " + responseText);

                var result = JsonConvert.DeserializeObject<ASRResponse>(responseText);
                if (result.err_no == 0)
                    onSuccess?.Invoke(string.Join("", result.result));
                else
                    onError?.Invoke($"识别失败(错误码{result.err_no}):{result.err_msg}");
            }
        }
    }

    /// <summary>
    /// 语音格式转换方法,转为 PCM16 格式
    /// </summary>
    /// <param name="clip">需要转换的音频</param>
    /// <returns>返回转换后的音频结果</returns>
    // 将 AudioClip 转为 PCM16 格式
    private byte[] ConvertClipToPCM16(AudioClip clip)
    {
        if (clip.channels != 1 || clip.frequency != 16000)
        {
            Debug.LogError("❌ 仅支持 16kHz 单通道音频");
            return null;
        }

        float[] samples = new float[clip.samples];
        clip.GetData(samples, 0);

        byte[] pcm = new byte[samples.Length * 2]; // 16-bit = 2 bytes
        for (int i = 0; i < samples.Length; i++)
        {
            short value = (short)(samples[i] * short.MaxValue);
            byte[] bytes = BitConverter.GetBytes(value);
            pcm[i * 2] = bytes[0];
            pcm[i * 2 + 1] = bytes[1];
        }
        return pcm;
    }

    [Serializable]
    public class ASRResponse
    {
        /// <summary>
        /// 短文本语音识别返回结构
        /// </summary>
        public int err_no;
        public string err_msg;
        public string sn;
        public string[] result;
    }
    #endregion


    #region 鉴权相关
    /// <summary>
    /// 鉴权方法
    /// </summary>
    /// <returns></returns>
    /// <summary>
    /// 获取百度 AccessToken(已使用 using 自动释放资源)
    /// </summary>
    public IEnumerator GetAccessToken()
    {
        string url = "https://aip.baidubce.com/oauth/2.0/token";

        WWWForm form = new WWWForm();
        form.AddField("grant_type", "client_credentials");
        form.AddField("client_id", apiKey);
        form.AddField("client_secret", secretKey);

        using (UnityWebRequest request = UnityWebRequest.Post(url, form))
        {
            yield return request.SendWebRequest();

            if (request.result == UnityWebRequest.Result.Success)
            {
                try
                {
                    var tokenResponse = JsonConvert.DeserializeObject<TokenResponse>(request.downloadHandler.text);
                    accessToken = tokenResponse.access_token;
                    Debug.Log("✅ 短语音识别获取 AccessToken 成功: " + accessToken);
                }
                catch (Exception ex)
                {
                    Debug.LogError("❌ 短语音识别AccessToken 解析失败: " + ex.Message);
                }
            }
            else
            {
                Debug.LogError("❌ 短语音识别获取 AccessToken 失败: " + request.error);
            }
        }
    }

    [Serializable]
    public class TokenResponse
    {
        /// <summary>
        /// 鉴权返回结构
        /// </summary>
        public string access_token;
    }
    #endregion
}

3、实现效果

在这里插入图片描述


网站公告

今日签到

点亮在社区的每一天
去签到