

using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using UnityEngine;
using UnityEngine.Networking;
using System.IO;
using System.Net.WebSockets;
using System.Threading;
using LitJson;
public class TTSManager : MonoBehaviour
{
[SerializeField] private string appId = "your_app_id";
[SerializeField] private string secretKey = "your_secret_key";
[SerializeField] private string accessToken = "your_access_token";
[SerializeField] private string voiceType = "zh_male_M392_conversation_wvae_bigtts";
[SerializeField] private string encoding = "mp3";
[SerializeField] private float speedRatio = 1.0f;
[SerializeField] private string httpApiUrl = "https://openspeech.bytedance.com/api/v1/tts";
[SerializeField] private string websocketApiUrl = "wss://openspeech.bytedance.com/api/v1/tts/ws_binary";
[SerializeField] private AuthType authType = AuthType.BearerToken;
public enum AuthType
{
BearerToken,
ApiKey,
BasicAuth
}
public delegate void AudioReceivedCallback(AudioClip audioClip);
public event AudioReceivedCallback OnAudioReceived;
public delegate void ErrorCallback(string errorMessage);
public event ErrorCallback OnError;
private static TTSManager instance;
public static TTSManager Instance
{
get
{
if (instance == null)
{
instance = FindObjectOfType<TTSManager>();
if (instance == null)
{
GameObject obj = new GameObject("TTSManager");
instance = obj.AddComponent<TTSManager>();
}
}
return instance;
}
}
private void Awake()
{
if (instance != null && instance != this)
{
Destroy(gameObject);
}
else
{
instance = this;
DontDestroyOnLoad(gameObject);
}
}
public IEnumerator TextToSpeechHTTP(string text, Action<AudioClip> callback = null)
{
if (!ValidateParameters())
{
OnError?.Invoke("参数验证失败,请检查配置");
if (callback != null) callback(null);
yield break;
}
string reqId = Guid.NewGuid().ToString();
var requestData = new Dictionary<string, object>
{
["app"] = new Dictionary<string, object>
{
["appid"] = appId,
["token"] = accessToken,
["cluster"] = "volcano_tts"
},
["user"] = new Dictionary<string, object>
{
["uid"] = "unity_user"
},
["audio"] = new Dictionary<string, object>
{
["voice_type"] = voiceType,
["encoding"] = encoding,
["speed_ratio"] = speedRatio
},
["request"] = new Dictionary<string, object>
{
["reqid"] = reqId,
["text"] = text,
["operation"] = "query"
}
};
string jsonData = JsonMapper.ToJson(requestData);
Debug.Log($"请求JSON: {jsonData}");
using (UnityWebRequest www = new UnityWebRequest(httpApiUrl, "POST"))
{
byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonData);
www.uploadHandler = new UploadHandlerRaw(bodyRaw);
www.downloadHandler = new DownloadHandlerBuffer();
www.SetRequestHeader("Content-Type", "application/json");
SetAuthenticationHeader(www);
yield return www.SendWebRequest();
if (www.result == UnityWebRequest.Result.ConnectionError ||
www.result == UnityWebRequest.Result.ProtocolError)
{
Debug.LogError($"HTTP错误: {www.error}");
Debug.LogError($"响应文本: {www.downloadHandler.text}");
OnError?.Invoke(www.error);
if (callback != null) callback(null);
yield break;
}
Debug.Log($"响应文本: {www.downloadHandler.text}");
TTSResponse response = JsonMapper.ToObject<TTSResponse>(www.downloadHandler.text);
if (response.code == 3000)
{
byte[] audioData = Convert.FromBase64String(response.data);
string tempPath = Path.Combine(Application.temporaryCachePath, $"tts_{reqId}.{encoding}");
File.WriteAllBytes(tempPath, audioData);
using (UnityWebRequest audioRequest = UnityWebRequestMultimedia.GetAudioClip($"file://{tempPath}", GetAudioType(encoding)))
{
yield return audioRequest.SendWebRequest();
if (audioRequest.result == UnityWebRequest.Result.ConnectionError ||
audioRequest.result == UnityWebRequest.Result.ProtocolError)
{
Debug.LogError($"音频加载错误: {audioRequest.error}");
OnError?.Invoke(audioRequest.error);
if (callback != null) callback(null);
yield break;
}
AudioClip clip = DownloadHandlerAudioClip.GetContent(audioRequest);
OnAudioReceived?.Invoke(clip);
if (callback != null) callback(clip);
}
}
else
{
Debug.LogError($"API错误: {response.message}");
OnError?.Invoke(response.message);
if (callback != null) callback(null);
}
}
}
public async Task TextToSpeechWebSocket(string text, Action<AudioClip> callback = null)
{
if (!ValidateParameters())
{
OnError?.Invoke("参数验证失败,请检查配置");
if (callback != null) callback(null);
return;
}
string reqId = Guid.NewGuid().ToString();
ClientWebSocket client = new ClientWebSocket();
try
{
SetWebSocketAuthentication(client);
await client.ConnectAsync(new Uri(websocketApiUrl), CancellationToken.None);
var requestData = new Dictionary<string, object>
{
["app"] = new Dictionary<string, object>
{
["appid"] = appId,
["token"] = accessToken,
["cluster"] = "volcano_tts"
},
["user"] = new Dictionary<string, object>
{
["uid"] = "unity_user"
},
["audio"] = new Dictionary<string, object>
{
["voice_type"] = voiceType,
["encoding"] = encoding,
["speed_ratio"] = speedRatio
},
["request"] = new Dictionary<string, object>
{
["reqid"] = reqId,
["text"] = text,
["operation"] = "submit"
}
};
string jsonData = JsonMapper.ToJson(requestData);
byte[] header = new byte[4];
header[0] = (byte)((1 << 4) | 1);
header[1] = (byte)(1 << 4);
header[2] = (byte)(1 << 4);
header[3] = 0;
byte[] jsonBytes = Encoding.UTF8.GetBytes(jsonData);
byte[] message = new byte[header.Length + jsonBytes.Length];
Array.Copy(header, 0, message, 0, header.Length);
Array.Copy(jsonBytes, 0, message, header.Length, jsonBytes.Length);
await client.SendAsync(new ArraySegment<byte>(message), WebSocketMessageType.Binary, true, CancellationToken.None);
List<byte> audioData = new List<byte>();
byte[] buffer = new byte[4096];
bool isComplete = false;
while (!isComplete && client.State == WebSocketState.Open)
{
WebSocketReceiveResult result = await client.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
if (result.MessageType == WebSocketMessageType.Binary)
{
if (buffer.Length >= 4)
{
int messageType = (buffer[1] >> 4) & 0x0F;
if (messageType == 0x0B)
{
int flags = buffer[1] & 0x0F;
if (flags == 0x02 || flags == 0x03)
{
isComplete = true;
}
byte[] audioChunk = new byte[result.Count - 4];
Array.Copy(buffer, 4, audioChunk, 0, audioChunk.Length);
audioData.AddRange(audioChunk);
}
else if (messageType == 0x0F)
{
string errorMsg = Encoding.UTF8.GetString(buffer, 4, result.Count - 4);
Debug.LogError($"WebSocket错误: {errorMsg}");
OnError?.Invoke(errorMsg);
if (callback != null) callback(null);
break;
}
}
}
else if (result.MessageType == WebSocketMessageType.Close)
{
isComplete = true;
await client.CloseAsync(WebSocketCloseStatus.NormalClosure, "", CancellationToken.None);
}
}
if (audioData.Count > 0 && isComplete)
{
string tempPath = Path.Combine(Application.temporaryCachePath, $"tts_{reqId}.{encoding}");
File.WriteAllBytes(tempPath, audioData.ToArray());
using (UnityWebRequest audioRequest = UnityWebRequestMultimedia.GetAudioClip($"file://{tempPath}", GetAudioType(encoding)))
{
var asyncOp = audioRequest.SendWebRequest();
var tcs = new TaskCompletionSource<bool>();
asyncOp.completed += _ => tcs.SetResult(true);
await tcs.Task;
if (audioRequest.result == UnityWebRequest.Result.ConnectionError ||
audioRequest.result == UnityWebRequest.Result.ProtocolError)
{
Debug.LogError($"音频加载错误: {audioRequest.error}");
OnError?.Invoke(audioRequest.error);
if (callback != null) callback(null);
}
else
{
AudioClip clip = DownloadHandlerAudioClip.GetContent(audioRequest);
OnAudioReceived?.Invoke(clip);
if (callback != null) callback(clip);
}
}
}
}
catch (Exception ex)
{
Debug.LogError($"WebSocket异常: {ex.Message}");
OnError?.Invoke(ex.Message);
if (callback != null) callback(null);
}
finally
{
if (client != null && client.State != WebSocketState.Closed)
{
await client.CloseAsync(WebSocketCloseStatus.NormalClosure, "", CancellationToken.None);
}
}
}
private void SetAuthenticationHeader(UnityWebRequest request)
{
switch (authType)
{
case AuthType.BearerToken:
string authHeader = $"Bearer; {accessToken}";
Debug.Log($"Authorization头: {authHeader}");
request.SetRequestHeader("Authorization", authHeader);
break;
case AuthType.ApiKey:
request.SetRequestHeader("X-API-Key", secretKey);
break;
case AuthType.BasicAuth:
string authValue = Convert.ToBase64String(Encoding.ASCII.GetBytes($"{appId}:{secretKey}"));
request.SetRequestHeader("Authorization", $"Basic {authValue}");
break;
}
}
private void SetWebSocketAuthentication(ClientWebSocket client)
{
switch (authType)
{
case AuthType.BearerToken:
client.Options.SetRequestHeader("Authorization", $"Bearer; {accessToken}");
break;
case AuthType.ApiKey:
client.Options.SetRequestHeader("X-API-Key", secretKey);
break;
case AuthType.BasicAuth:
string authValue = Convert.ToBase64String(Encoding.ASCII.GetBytes($"{appId}:{secretKey}"));
client.Options.SetRequestHeader("Authorization", $"Basic {authValue}");
break;
}
}
private AudioType GetAudioType(string encoding)
{
switch (encoding.ToLower())
{
case "mp3": return AudioType.MPEG;
case "wav": return AudioType.WAV;
case "ogg": return AudioType.OGGVORBIS;
default: return AudioType.UNKNOWN;
}
}
private bool ValidateParameters()
{
if (string.IsNullOrEmpty(appId))
{
Debug.LogError("appId不能为空");
return false;
}
if (string.IsNullOrEmpty(accessToken) && authType == AuthType.BearerToken)
{
Debug.LogError("accessToken不能为空");
return false;
}
if (string.IsNullOrEmpty(secretKey) && (authType == AuthType.ApiKey || authType == AuthType.BasicAuth))
{
Debug.LogError("secretKey不能为空");
return false;
}
if (string.IsNullOrEmpty(voiceType))
{
Debug.LogError("voiceType不能为空");
return false;
}
if (string.IsNullOrEmpty(encoding))
{
Debug.LogError("encoding不能为空");
return false;
}
if (string.IsNullOrEmpty(httpApiUrl))
{
Debug.LogError("httpApiUrl不能为空");
return false;
}
if (string.IsNullOrEmpty(websocketApiUrl))
{
Debug.LogError("websocketApiUrl不能为空");
return false;
}
return true;
}
[Serializable]
private class TTSResponse
{
public string reqid;
public int code;
public string message;
public int sequence;
public string data;
public Dictionary<string, object> addition;
}
}
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;
public class TTSExample : MonoBehaviour
{
public InputField textInput;
public Button httpButton;
public Button websocketButton;
public AudioSource audioSource;
public Text statusText;
private void Start()
{
httpButton.onClick.AddListener(OnHTTPButtonClick);
websocketButton.onClick.AddListener(OnWebSocketButtonClick);
TTSManager.Instance.OnAudioReceived += OnAudioReceived;
TTSManager.Instance.OnError += OnError;
}
private void OnDestroy()
{
TTSManager.Instance.OnAudioReceived -= OnAudioReceived;
TTSManager.Instance.OnError -= OnError;
}
private void OnHTTPButtonClick()
{
if (string.IsNullOrEmpty(textInput.text))
{
statusText.text = "请输入文本";
return;
}
statusText.text = "正在请求音频...";
StartCoroutine(TTSManager.Instance.TextToSpeechHTTP(textInput.text));
}
private async void OnWebSocketButtonClick()
{
if (string.IsNullOrEmpty(textInput.text))
{
statusText.text = "请输入文本";
return;
}
statusText.text = "正在请求音频...";
await TTSManager.Instance.TextToSpeechWebSocket(textInput.text);
}
private void OnAudioReceived(AudioClip clip)
{
if (clip != null)
{
audioSource.clip = clip;
audioSource.Play();
statusText.text = "音频播放中...";
}
}
private void OnError(string errorMessage)
{
statusText.text = $"错误: {errorMessage}";
Debug.LogError(errorMessage);
}
}