vosk介绍以及安装,参考地址:https://blog.csdn.net/qq_35385687/article/details/119209189?spm=1001.2014.3001.5501
vosk实时语音识别,参考地址:https://blog.csdn.net/qq_35385687/article/details/119357825?spm=1001.2014.3001.5501

命令行模式

#!/usr/bin/env python3

import os
import wave
from vosk import Model, KaldiRecognizer, SetLogLevel

SetLogLevel(0)

if not os.path.exists("model"):
    print(
        "Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
    exit(1)
# sys.argv[1]
wf = wave.open("./test.wav", "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
    print("Audio file must be WAV format mono PCM.")
    exit(1)

model = Model("model")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(False)

while True:
    data = wf.readframes(4000)
    if len(data) == 0:
        break
    if rec.AcceptWaveform(data):
        print("r" + rec.Result())
    else:
        print("p" + rec.PartialResult())

print("f" + rec.FinalResult())

文件上传方式

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import wave
from flask import Flask, request
from vosk import Model, KaldiRecognizer, SetLogLevel

app = Flask(__name__)
download_floder = './upload/'
SetLogLevel(0)

if not os.path.exists("model"):
    print("请从'https://alphacephei.com/vosk/models'下载模型并解压到'model'文件夹")
    exit(1)


# 文件类型检查
def allow_file(filename):
    allow_list = ['wav', 'pcm']
    suffix = filename.split('.')
    return len(suffix) > 1 and suffix[1] in allow_list


@app.route('/upload', methods=['POST'])
def upload():
    f = request.files['file']
    if f and allow_file(f.filename):
        filePath = os.path.join(download_floder, f.filename)
        print(filePath)
        f.save(filePath)

        if f.filename.split('.')[1] == 'pcm':
            filePath = download_floder + f.filename.replace('pcm', 'wav')
            with open(filePath, 'rb') as pcmfile:
                pcmdata = pcmfile.read()
            with wave.open(filePath, 'wb') as wavfile:
                wavfile.setparams((1, 2, 16000, 0, 'NONE', 'NONE'))
                wavfile.writeframes(pcmdata)

        print(filePath)
        wf = wave.open(filePath, "rb")
        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
            print("Audio file must be WAV format mono PCM.")
            exit(1)
        model = Model("model")
        rec = KaldiRecognizer(model, wf.getframerate())
        rec.SetWords(False)

        while True:
            data = wf.readframes(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                print(rec.Result())
            else:
                print(rec.PartialResult())

        return rec.FinalResult()


if __name__ == '__main__':
    if not os.path.exists(download_floder):
        os.makedirs(download_floder)
    app.run(debug=True, host='0.0.0.0')

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐