zoukankan      html  css  js  c++  java
  • springBoot+vue+百度语音识别

    1.将百度语音识别demo下载下来 并且用maven封装成一个jar包 核心代码如下

    package com.baidu.speech.restapi.asrdemo;
    
    import com.alibaba.fastjson.JSONObject;
    import com.baidu.speech.restapi.asrdemo.common.ConnUtil;
    import com.baidu.speech.restapi.asrdemo.common.DemoException;
    import com.baidu.speech.restapi.asrdemo.common.TokenHolder;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.net.HttpURLConnection;
    import java.net.URL;
    
    public class AsrUtil {
        private final boolean METHOD_RAW = false; // 默认以json方式上传音频文件
    
        //  填写网页上申请的appkey 如 $apiKey="g8eBUMSokVB1BHGmgxxxxxx"
    //    private final String APP_KEY = "kVcnfD9iW2XVZSMaLMrtLYIz";
        // my
        private static final String APP_KEY = "XXXXXXXXXXXXXX";
    
        // 填写网页上申请的APP SECRET 如 $SECRET_KEY="94dc99566550d87f8fa8ece112xxxxx"
    //    private final String SECRET_KEY = "O9o1O213UgG5LFn0bDGNtoRN3VWl2du6";
        // my
        private static final String SECRET_KEY = "XXXXXXXXXXXX";
    
        // 需要识别的文件
        private static final String FILENAME = "16k.pcm";
    
        // 文件格式, 支持pcm/wav/amr 格式,极速版额外支持m4a 格式
        private static final String FORMAT = FILENAME.substring(FILENAME.length() - 3);
    
    
        private static String CUID = "1234567JAVA";
    
        // 采样率固定值
        private static final int RATE = 16000;
    
        private static String URL;
    
        private static int DEV_PID;
    
        //private int LM_ID;//测试自训练平台需要打开此注释
    
        private static String SCOPE;
    
        //  普通版 参数
    //    {
    //        URL = "http://vop.baidu.com/server_api"; // 可以改为https
    //        //  1537 表示识别普通话,使用输入法模型。 其它语种参见文档
    //        DEV_PID = 1537;
    //        SCOPE = "audio_voice_assistant_get";
    //    }
    
        // 自训练平台 参数
        /*{
            //自训练平台模型上线后,您会看见 第二步:“”获取专属模型参数pid:8001,modelid:1234”,按照这个信息获取 dev_pid=8001,lm_id=1234
            DEV_PID = 8001;
            LM_ID = 1234;
        }*/
    
        // 极速版 参数
        static {
            URL =   "http://vop.baidu.com/pro_api"; // 可以改为https
            DEV_PID = 80001;
            SCOPE = "brain_enhanced_asr";
        }
    
        /* 忽略scope检查,非常旧的应用可能没有
        {
            SCOPE = null;
        }
        */
    
        public static String obtainAsrResult(byte[] bytes) throws IOException, DemoException {
            String resultJson = execute(bytes);
            System.out.println("识别结束:结果是:");
            System.out.println(resultJson);
            return resultJson;
        }
    
    
        public static String execute(byte[] bytes) throws IOException, DemoException {
            TokenHolder holder = new TokenHolder(APP_KEY, SECRET_KEY, SCOPE);
            holder.resfresh();
            String token = holder.getToken();
            String result = null;
            result = runJsonPostMethod(token, bytes);
            return result;
        }
    
        private String runRawPostMethod(String token) throws IOException, DemoException {
            String url2 = URL + "?cuid=" + ConnUtil.urlEncode(CUID) + "&dev_pid=" + DEV_PID + "&token=" + token;
            //测试自训练平台需要打开以下信息
            //String url2 = URL + "?cuid=" + ConnUtil.urlEncode(CUID) + "&dev_pid=" + DEV_PID + "&lm_id="+ LM_ID + "&token=" + token;
            String contentTypeStr = "audio/" + FORMAT + "; rate=" + RATE;
            //System.out.println(url2);
            byte[] content = getFileContent(FILENAME);
            HttpURLConnection conn = (HttpURLConnection) new URL(url2).openConnection();
            conn.setConnectTimeout(5000);
            conn.setRequestProperty("Content-Type", contentTypeStr);
            conn.setRequestMethod("POST");
            conn.setDoOutput(true);
            conn.getOutputStream().write(content);
            conn.getOutputStream().close();
            System.out.println("url is " + url2);
            System.out.println("header is  " + "Content-Type :" + contentTypeStr);
            String result = ConnUtil.getResponseString(conn);
            return result;
        }
    
        public static String runJsonPostMethod(String token,byte[] bytes) throws DemoException, IOException {
    
    //        byte[] content = getFileContent(FILENAME);
            String speech = base64Encode(bytes);
    
            JSONObject params = new JSONObject();
            params.put("dev_pid", DEV_PID);
            //params.put("lm_id",LM_ID);//测试自训练平台需要打开注释
            params.put("format", "wav");
            params.put("rate", RATE);
            params.put("token", token);
            params.put("cuid", CUID);
            params.put("channel", "1");
            params.put("len", bytes.length);
            params.put("speech", speech);
    
            // System.out.println(params.toString());
            HttpURLConnection conn = (HttpURLConnection) new URL(URL).openConnection();
            conn.setConnectTimeout(5000);
            conn.setRequestMethod("POST");
            conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
            conn.setDoOutput(true);
            conn.getOutputStream().write(params.toString().getBytes());
            conn.getOutputStream().close();
            String result = ConnUtil.getResponseString(conn);
    
    
            params.put("speech", "base64Encode(getFileContent(FILENAME))");
            System.out.println("url is : " + URL);
            System.out.println("params is :" + params.toString());
    
    
            return result;
        }
    
        private byte[] getFileContent(String filename) throws DemoException, IOException {
            File file = new File(filename);
            if (!file.canRead()) {
                System.err.println("文件不存在或者不可读: " + file.getAbsolutePath());
                throw new DemoException("file cannot read: " + file.getAbsolutePath());
            }
            FileInputStream is = null;
            try {
                is = new FileInputStream(file);
                return ConnUtil.getInputStreamContent(is);
            } finally {
                if (is != null) {
                    try {
                        is.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
    
        }
    
        private static String base64Encode(byte[] content) {
            /**
             Base64.Encoder encoder = Base64.getEncoder(); // JDK 1.8  推荐方法
             String str = encoder.encodeToString(content);
             **/
    
            char[] chars = Base64Util.encode(content); // 1.7 及以下,不推荐,请自行跟换相关库
            String str = new String(chars);
    
            return str;
        }
    }

    调用 obtainAsrResult 方法即可获得识别后字符串

    2.使用 ffmpeg 对音频进行转码 

      下载ffmpeg 并且将bin目录设置到环境变量即可

    代码如下:

    @Override
        public String aiAsrTest(MultipartFile file) {
            String r = null;
            try {
                byte[] bytes = file.getBytes();
                // 对上传文件进行转码处理
                String path = "D:\bwbd\temp\";
                File dir = new File(path);
                if (dir == null || !dir.exists()) {
                    dir.mkdirs();
                }
                File file1 = new File(path + "16k1.wav");
                file.transferTo(file1);
                File file2 = new File(path + "16k2.wav");
                try {
                    logger.info("========音频格式转换======");
                    Runtime runtime = Runtime.getRuntime();
                    String cutCmd = "ffmpeg -y  -i " + file1 + "  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 " + file2;
                    Process proce = runtime.exec(cutCmd);
                    InputStream erro = proce.getErrorStream();
                    byte[] a = new byte[1024];
                    int j = 0;
                    while ((j = erro.read(a)) > -1) {
    //                logger.info(new String(a));
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                    logger.info("=========文件 "+ file + " 正在转换出现异常");
                }
                byte[] fileByteArray = FileUtil.getFileByteArray(file2);
    
                String result = AsrUtil.obtainAsrResult(fileByteArray);
                log.info("===ai接口返回:" + result);
                JSONObject jsonObject = JSONObject.parseObject(result);
                Object result1 = jsonObject.get("result");
                if (null != result1) {
                    List<String> strings = JSONArray.parseArray(result1.toString(), String.class);
                    r = strings.get(0);
                    r = new String(r.getBytes(),"utf-8");
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            return r;
        }

    最后 附上vue实现音功能 并上传到后台 获得识别后字符串

    1.methods域内定义以下方法

    initAudio(){
          // this.$nextTick(() => {
            // try {
            //   // <!-- 检查是否能够调用麦克风 -->
            //   window.AudioContext = window.AudioContext || window.webkitAudioContext;
            //   navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia;
            //   window.URL = window.URL || window.webkitURL;
            //
            //   audio_context = new AudioContext;
            //   console.log('navigator.getUserMedia ' + (navigator.getUserMedia ? 'available.' : 'not present!'));
            // } catch (e) {
            //   alert('No web audio support in this browser!');
            // }
          var _this = this;
          navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia;
          navigator.getUserMedia({audio: true}, function (stream) {
            let recorder = new HZRecorder(stream);
            _this.recorder = recorder;
            console.log('初始化完成');
          }, function(e) {
            console.log('No live audio input: ' + e);
          });
          // })
        },
          readyOriginal () {
            if (!this.isVoice) {
              // <!-- 开启录音 -->
              this.recorder && this.recorder.start();
              this.isVoice = true
            } else {
              this.isVoice = false
              // 结束录音
              this.recorder && this.recorder.stop();
              setTimeout(()=> {
                // <!-- 录音上传 --> https://localhost/bwbd/fg/aiAsrTest
                var mp3Blob = this.recorder.upload();
                var fd = new FormData();
                fd.append('file', mp3Blob);
                // this.$axios.post('https://localhost/api/webUser/insertUserLog',allJoin).then(
                //   res=>{
                //     if(res.data.data===true){
                //       return
                //     }
                //   }
                // )
                this.$axios.post('http://localhost/bwbd/fg/aiAsrTest',fd).then((res) => {
                  // 这里做登录拦截
                  if (res.data.status === 200) {
                    console.log('保存成功');
                    console.log(res.data.data)
                  } else {
                    this.returnmsg = '上传失败'
                  }
                })
              },1000)
            }
          },

    html元素调用 readyOriginal  方法 mounted 域内调用 initAudio 方法

    记得引入js文件

    import { HZRecorder} from '../utils/HZRecorder.js';
    function HZRecorder(stream, config) {
      config = config || {};
      config.sampleBits = config.sampleBits || 16;   //采样数位 8, 16
      config.sampleRate = config.sampleRate || 16000;  //采样率16khz
    
      var context = new (window.webkitAudioContext || window.AudioContext)();
      var audioInput = context.createMediaStreamSource(stream);
      var createScript = context.createScriptProcessor || context.createJavaScriptNode;
      var recorder = createScript.apply(context, [4096, 1, 1]);
    
      var audioData = {
        size: 0     //录音文件长度
        , buffer: []   //录音缓存
        , inputSampleRate: context.sampleRate  //输入采样率
        , inputSampleBits: 16    //输入采样数位 8, 16
        , outputSampleRate: config.sampleRate  //输出采样率
        , oututSampleBits: config.sampleBits    //输出采样数位 8, 16
        , input: function (data) {
          this.buffer.push(new Float32Array(data));
          this.size += data.length;
        }
        , compress: function () { //合并压缩
          //合并
          var data = new Float32Array(this.size);
          var offset = 0;
          for (var i = 0; i < this.buffer.length; i++) {
            data.set(this.buffer[i], offset);
            offset += this.buffer[i].length;
          }
          //压缩
          var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
          var length = data.length / compression;
          var result = new Float32Array(length);
          var index = 0, j = 0;
          while (index < length) {
            result[index] = data[j];
            j += compression;
            index++;
          }
          return result;
        }
        , encodeWAV: function () {
          var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
          var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
          var bytes = this.compress();
          var dataLength = bytes.length * (sampleBits / 8);
          var buffer = new ArrayBuffer(44 + dataLength);
          var data = new DataView(buffer);
    
          var channelCount = 1;//单声道
          var offset = 0;
    
          var writeString = function (str) {
            for (var i = 0; i < str.length; i++) {
              data.setUint8(offset + i, str.charCodeAt(i));
            }
          }
    
          // 资源交换文件标识符
          writeString('RIFF'); offset += 4;
          // 下个地址开始到文件尾总字节数,即文件大小-8
          data.setUint32(offset, 36 + dataLength, true); offset += 4;
          // WAV文件标志
          writeString('WAVE'); offset += 4;
          // 波形格式标志
          writeString('fmt '); offset += 4;
          // 过滤字节,一般为 0x10 = 16
          data.setUint32(offset, 16, true); offset += 4;
          // 格式类别 (PCM形式采样数据)
          data.setUint16(offset, 1, true); offset += 2;
          // 通道数
          data.setUint16(offset, channelCount, true); offset += 2;
          // 采样率,每秒样本数,表示每个通道的播放速度
          data.setUint32(offset, sampleRate, true); offset += 4;
          // 波形数据传输率 (每秒平均字节数) 单声道×每秒数据位数×每样本数据位/8
          data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
          // 快数据调整数 采样一次占用字节数 单声道×每样本的数据位数/8
          data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
          // 每样本数据位数
          data.setUint16(offset, sampleBits, true); offset += 2;
          // 数据标识符
          writeString('data'); offset += 4;
          // 采样数据总数,即数据总大小-44
          data.setUint32(offset, dataLength, true); offset += 4;
          // 写入采样数据
          if (sampleBits === 8) {
            for (var i = 0; i < bytes.length; i++, offset++) {
              var s = Math.max(-1, Math.min(1, bytes[i]));
              var val = s < 0 ? s * 0x8000 : s * 0x7FFF;
              val = parseInt(255 / (65535 / (val + 32768)));
              data.setInt8(offset, val, true);
            }
          } else {
            for (var i = 0; i < bytes.length; i++, offset += 2) {
              var s = Math.max(-1, Math.min(1, bytes[i]));
              data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
            }
          }
    
          return new Blob([data], { type: 'audio/wav' });
        }
      };
      //开始录音
      this.start = function () {
        audioInput.connect(recorder);
        recorder.connect(context.destination);
      }
    
      //停止
      this.stop = function () {
        recorder.disconnect();
      }
    
      //获取音频文件
      this.getBlob = function () {
        this.stop();
        return audioData.encodeWAV();
      }
    
      //回放
      this.play = function (audio) {
        var blob=this.getBlob();
        // saveAs(blob, "F:/3.wav");
        audio.src = window.URL.createObjectURL(this.getBlob());
      }
    
      //上传
      this.upload = function () {
        return this.getBlob()
      }
    
      //音频采集
      recorder.onaudioprocess = function (e) {
        audioData.input(e.inputBuffer.getChannelData(0));
        //record(e.inputBuffer.getChannelData(0));
      }
    
      return this;
    }
    
    export {
      HZRecorder
    }
  • 相关阅读:
    setlocale set the current locale
    测试一个目录下的文件共有多少行
    ping中用到的校验和算法
    atomic integer operations P176
    我要理解1为什么是0xffffffff,所以写了下面的程序理解。
    贝叶斯网络中一个节点的类
    bash 的浮点除法
    shell(1)
    AndroidBroadcast详解与汇总
    AndroidActivity详解与汇总
  • 原文地址:https://www.cnblogs.com/guanxiaohe/p/13300252.html
Copyright © 2011-2022 走看看