功能概述
允许用户选择通过麦克风(microphone)或者获取系统声音(system_sound)作为语音输入源。
对输入的语音进行识别,得到对应的文本内容。
将识别出的文本翻译成指定的目标语言。
把翻译后的文本转换为语音并播放出来。
import speech_recognition as sr
from gtts import gTTS
from googletrans import Translator
import os
import time
import pyaudio
import pyshark
# 创建识别器对象
r = sr.Recognizer()
# 创建翻译器对象
translator = Translator()
def recognize_speech(language, input_type="microphone"):
if input_type == "microphone":
with sr.Microphone() as source:
print(f"请说话({language}):")
audio = r.listen(source)
elif input_type == "system_sound":
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print(f"正在获取系统声音({language}),请在系统中播放相关音频...")
frames = []
for i in range(0, int(RATE / CHUNK * 5)): # 获取5秒钟的音频数据,可根据需要调整
data = stream.read(CHUNK)
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
audio_data = sr.AudioData(b''.join(frames), RATE, FORMAT)
else:
raise ValueError("无效的输入类型,可选'microphone'或'system_sound'")
try:
if input_type == "microphone":
text = r.recognize_google(audio, language=language)
elif input_type == "system_sound":
text = r.recognize_google(audio_data, language=language)
print(f"识别结果: {text}")
return text
except sr.UnknownError:
print("无法识别语音")
except sr.RequestError as e:
print(f"请求错误:{e}")
def translate_text(text, target_language):
translation = translator.translate(text, dest=target_language)
print(f"翻译结果: {translation.text}")
return translation.text
def speak_text(text, language):
tts = gTTS(text=text, lang=language)
tts.save("output.mp3")
os.system("mpg123 output.mp3")
os.remove("output.mp3")
if __name__ == "__main__":
source_language = "en-US" # 源语言,这里设置为英语(美国),可根据需要修改
target_language = "zh-CN" # 目标语言,这里设置为中文(中国大陆),可根据需要修改
input_option = input("请选择输入方式(microphone/system_sound):")
while True:
recognized_text = recognize_speech(source_language, input_type=input_option)
if recognized_text:
translated_text = translate_text(recognized_text, target_language)
speak_text(translated_text, target_language)
time.sleep(2) # 暂停2秒,避免过于频繁的识别