#Python 同声传译工具 开发中

发布于:2024-12-06 ⋅ 阅读:(99) ⋅ 点赞:(0)

功能概述
允许用户选择通过麦克风(microphone)或者获取系统声音(system_sound)作为语音输入源。
对输入的语音进行识别,得到对应的文本内容。
将识别出的文本翻译成指定的目标语言。
把翻译后的文本转换为语音并播放出来。

import speech_recognition as sr
from gtts import gTTS
from googletrans import Translator
import os
import time
import pyaudio
import pyshark


# 创建识别器对象
r = sr.Recognizer()
# 创建翻译器对象
translator = Translator()


def recognize_speech(language, input_type="microphone"):
    if input_type == "microphone":
        with sr.Microphone() as source:
            print(f"请说话({language}):")
            audio = r.listen(source)
    elif input_type == "system_sound":
        CHUNK = 1024
        FORMAT = pyaudio.paInt16
        CHANNELS = 1
        RATE = 44100

        p = pyaudio.PyAudio()

        stream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK)

        print(f"正在获取系统声音({language}),请在系统中播放相关音频...")

        frames = []
        for i in range(0, int(RATE / CHUNK * 5)):  # 获取5秒钟的音频数据,可根据需要调整
            data = stream.read(CHUNK)
            frames.append(data)

        stream.stop_stream()
        stream.close()
        p.terminate()

        audio_data = sr.AudioData(b''.join(frames), RATE, FORMAT)
    else:
        raise ValueError("无效的输入类型,可选'microphone'或'system_sound'")

    try:
        if input_type == "microphone":
            text = r.recognize_google(audio, language=language)
        elif input_type == "system_sound":
            text = r.recognize_google(audio_data, language=language)

        print(f"识别结果: {text}")
        return text
    except sr.UnknownError:
        print("无法识别语音")
    except sr.RequestError as e:
        print(f"请求错误:{e}")


def translate_text(text, target_language):
    translation = translator.translate(text, dest=target_language)
    print(f"翻译结果: {translation.text}")
    return translation.text


def speak_text(text, language):
    tts = gTTS(text=text, lang=language)
    tts.save("output.mp3")
    os.system("mpg123 output.mp3")
    os.remove("output.mp3")


if __name__ == "__main__":
    source_language = "en-US"  # 源语言,这里设置为英语(美国),可根据需要修改
    target_language = "zh-CN"  # 目标语言,这里设置为中文(中国大陆),可根据需要修改

    input_option = input("请选择输入方式(microphone/system_sound):")

    while True:
        recognized_text = recognize_speech(source_language, input_type=input_option)
        if recognized_text:
            translated_text = translate_text(recognized_text, target_language)
            speak_text(translated_text, target_language)
        time.sleep(2)  # 暂停2秒,避免过于频繁的识别


网站公告

今日签到

点亮在社区的每一天
去签到