前言

 语音识别技术就是让智能设备听懂人类的语音。它是一门涉及数字信号处理、人工智能、语言学、数理统计学、声学、情感学及心理学等多学科交叉的科学

1.引入库

from aip import AipSpeech
import pyaudio # 自己下载轮子
import wave

2.语音合成

import pyaudio # 自己下载轮子
import wave
in_path = "D:/aihei/input.wav" # 存放录音的路径

def get_audio():
    # aa = str(input("是否开始录音?   (y/n)"))
    # if aa == str("y") :
        CHUNK = 1024
        FORMAT = pyaudio.paInt16
        CHANNELS = 1                # 声道数
        RATE = 11025                # 采样率
        RECORD_SECONDS = 6 # 录音时间
        WAVE_OUTPUT_FILENAME ="D:/aihei/input.wav"
        p = pyaudio.PyAudio()

        stream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK)
        print("*"*6, "开始录音:请在6秒内输入语音", "*"*6)
        frames = []
        for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
            data = stream.read(CHUNK)
            frames.append(data)
        print("*"*6, "录音结束\n")
        stream.stop_stream()
        stream.close()
        p.terminate()
        wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))
        wf.close()
    # elif aa == str("否"):
    #     exit()
    # else:
    #     print("语音录入失败,请重新开始")

3.语音识别

from aip import AipSpeech
def resolution():
    app_id = "25907295"
    api_key = "zpGtZmV6YwgTjmIZbhalvocZ"
    secret_key = "YAlY3XdKspLzBxvQQKFDWhdoZlM674gi"
    client = AipSpeech(app_id,api_key,secret_key)
    with open(r'D:/aihei/input.wav','rb') as fp:
        au=fp.read()
    res=client.asr(au,'wav',16000,{'dev_pid':1537,})
    # print("".join(res['result']))
    res1="".join(res['result'])
    print(res1)
    print(type(res1))
    return res1

总结:


语音合成通过调用pyaodio(自己的轮子)库来实现,语音识别通过调用百度AIP库来实现。

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐