JAVA开发离线语音识别

200 阅读 0 评论 132 点赞

我是靠谱客的博主羞涩钥匙，这篇文章主要介绍JAVA开发离线语音识别，现在分享给大家，希望可以做个参考。

可以离线识别，但是暂时只有一个小的语音库，识别准确率特别低。

如果谁有训练语音库的方法希望可以分享一下。谢谢！

springboot框架搭的一个小demo。

原文地址还有前端页面html和js，有录音，播放，翻译等小功能，详情见下边原文地址。

package com.example.gadgets.yysb;

import com.alibaba.fastjson.JSONObject;
import com.sun.media.sound.WaveFileReader;
import com.sun.media.sound.WaveFileWriter;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;

import javax.sound.sampled.*;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;

public class VoiceUtil {



    //模型的地址，需要去官网下载：https://alphacephei.com/vosk/models，这里选择的是Chinese里的vosk-model-small-cn-0.22 微型版本
    //经测试，微型版本转化准确率30%左右。如果语言不清楚，可能更低。明天下载个大的包试一下
    private static String VOSKMODELPATH = "D:/yuyinshibie/vosk-model-small-cn-0.22";

    public static String getWord(String filePath) throws IOException, UnsupportedAudioFileException {
        Assert.isTrue(StringUtils.hasLength(VOSKMODELPATH), "无效的VOS模块！");
        byte[] bytes = Files.readAllBytes(Paths.get(filePath));
        // 转换为16KHZ
        reSamplingAndSave(bytes, filePath);
        File f = new File(filePath);
        RandomAccessFile rdf = null;
        rdf = new RandomAccessFile(f, "r");
        System.out.println("声音尺寸:{}"+ toInt(read(rdf, 4, 4)));
        System.out.println("音频格式:{}"+ toShort(read(rdf, 20, 2)));
        short track=toShort(read(rdf, 22, 2));
        System.out.println("1 单声道 2 双声道: {}"+ track);
        System.out.println("采样率、音频采样级别 16000 = 16KHz: {}"+ toInt(read(rdf, 24, 4)));
        System.out.println("每秒波形的数据量：{}"+ toShort(read(rdf, 22, 2)));
        System.out.println("采样帧的大小：{}"+ toShort(read(rdf, 32, 2)));
        System.out.println("采样位数：{}"+ toShort(read(rdf, 34, 2)));
        rdf.close();
        LibVosk.setLogLevel(LogLevel.WARNINGS);

        try (Model model = new Model(VOSKMODELPATH);
             InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream(filePath)));
             // 采样率为音频采样率的声道倍数
             Recognizer recognizer = new Recognizer(model, 16000*track)) {

            int nbytes;
            byte[] b = new byte[4096];
            int i = 0;
            while ((nbytes = ais.read(b)) >= 0) {
                i += 1;
                if (recognizer.acceptWaveForm(b, nbytes)) {
//                    System.out.println(recognizer.getResult());
                } else {
//                    System.out.println(recognizer.getPartialResult());
                }
            }
            String result = recognizer.getFinalResult();
            System.out.println("识别结果：{}"+ result);
            if (StringUtils.hasLength(result)) {
                JSONObject jsonObject = JSONObject.parseObject(result);
                return jsonObject.getString("text").replace(" ", "");
            }
            return "";
        }
    }

    public static int toInt(byte[] b) {
        return (((b[3] & 0xff) << 24) + ((b[2] & 0xff) << 16) + ((b[1] & 0xff) << 8) + ((b[0] & 0xff) << 0));
    }

    public static short toShort(byte[] b) {
        return (short) ((b[1] << 8) + (b[0] << 0));
    }


    public static byte[] read(RandomAccessFile rdf, int pos, int length) throws IOException {
        rdf.seek(pos);
        byte result[] = new byte[length];
        for (int i = 0; i < length; i++) {
            result[i] = rdf.readByte();
        }
        return result;
    }

    public static void reSamplingAndSave(byte[] data, String path) throws IOException, UnsupportedAudioFileException {
        WaveFileReader reader = new WaveFileReader();
        AudioInputStream audioIn = reader.getAudioInputStream(new ByteArrayInputStream(data));

        AudioFormat srcFormat = audioIn.getFormat();
        int targetSampleRate = 16000;

        AudioFormat dstFormat = new AudioFormat(srcFormat.getEncoding(),
                targetSampleRate,
                srcFormat.getSampleSizeInBits(),
                srcFormat.getChannels(),
                srcFormat.getFrameSize(),
                srcFormat.getFrameRate(),
                srcFormat.isBigEndian());

        AudioInputStream convertedIn = AudioSystem.getAudioInputStream(dstFormat, audioIn);
        File file = new File(path);
        WaveFileWriter writer = new WaveFileWriter();
        writer.write(convertedIn, AudioFileFormat.Type.WAVE, file);
    }

    public static void main(String[] args) {
        String path = "D:/yuyinshibie/test456.wav";
        File localFile = new File(path);
        try {

            //开始解析
            String text = getWord(path);
            System.out.println("text:"+text);
            localFile.delete();
        } catch (IOException | UnsupportedAudioFileException e) {
            e.printStackTrace();
            localFile.delete();
        }
    }

}

原文：java 离线中文语音文字识别 - Rolay - 博客园转载注明出处：https://www.cnblogs.com/rolayblog/p/15237099.html 项目需要，要实现类似小爱同学的语音控制功能，并且要离线，不能花公司一分钱。第一步就是需https://www.cnblogs.com/rolayblog/p/15237099.html