zoukankan      html  css  js  c++  java
  • 人工智能

    人工智能 - 基于火狐浏览器的语音识别,语音自动回复

    一. 需求工具

    1. 下载安装火狐浏览器

      因为火狐浏览器自带麦克风

    2. 安装Recorer.js

      一个易于使用的录音机,以Matt Diamond的 录音机为核心

      http://www.chengfeilong.com/recorderjs-info

    3. 百度AI

    4. 图灵机器人

    5. mongoDB(数据库) - pymongo

      可以选择其他数据库

    6. jQuery

    7. Flask框架

    二. 不废话,上代码

    1. index.HTML

    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>我是玩具</title>
    </head>
    <body>
    
    <p><audio id="player" controls autoplay ></audio></p>
    <button onclick="start_reco()">录音</button>
    <button onclick="stop_reco()">发送语音</button>
    <div id="content"></div>
    
    
    </body>
    <script type="text/javascript" src="/static/Recorder.js"></script>
    <script type="text/javascript" src="/static/jquery-3.3.1.min.js"></script>
    <script type="text/javascript">
        var serv = "http://192.168.11.206:9527";
    
        var reco = null;
        var audio_context = new AudioContext();//音频内容对象
        navigator.getUserMedia = (navigator.getUserMedia ||
            navigator.webkitGetUserMedia ||
            navigator.mozGetUserMedia ||
            navigator.msGetUserMedia);
    
        navigator.getUserMedia({audio:true}, create_stream, function (err) {
            console.log(err)
        });
    
        function create_stream(user_media) {
            var stream_input = audio_context.createMediaStreamSource(user_media);
            reco = new Recorder(stream_input);
        }
    
    
        function start_reco() {
            reco.record();
        }
    
        function stop_reco() {
            reco.stop();
    
            reco.exportWAV(function (wav_file) {
                console.log(wav_file);
                var formdata = new FormData(); // form 表单 {key:value}
                formdata.append("reco",wav_file); // form input type="file"
                formdata.append("key","value");
                // # <input type="text" name = "key"> value
                $.ajax({
                url: serv + "/upload",
                type: 'post',
                processData: false,
                contentType: false,
                data: formdata,
                dataType: 'json',
                success: function (data) {
                    console.log(data);
                    if(data.code == 0){
                        document.getElementById("player").src = "http://192.168.11.206:9527/get_file/"+data.filename;
                        document.getElementById("content").innerText = data.content;
    
                    }
                }
                })
            });
    
            reco.clear();
        }
    
    </script>
    </html>
    

    2. adiou.py(封装的 百度AI, 图灵机器人 函数)

    from aip import AipSpeech
    from aip import AipNlp
    import os
    from uuid import uuid4
    
    """ 你的 APPID AK SK """
    APP_ID = '15837844'
    API_KEY = '411VNGbuZVbDNZU78LqTzfsV'
    SECRET_KEY = '84AnwR2NARGMqnC6WFnzqQL9WWdWh5bW'
    
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    
    
    def get_file_content(filePath):
        os.system(f"ffmpeg -y  -i {filePath}  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
        with open(f"{filePath}.pcm", 'rb') as fp:
            return fp.read()
    
    
    def audio2text(filePath):
        res = client.asr(get_file_content(filePath), 'pcm', 16000, {
            'dev_pid': 1536,
        })
    
        text = res.get("result")[0]
        print(text)
    
        return text
    
    import requests
    
    
    def to_tuling(text, uid):
        data = {
            "perception": {
                "inputText": {
                    "text": "北京"
                }
            },
            "userInfo": {
                "apiKey": "a4c4a668c9f94d0c928544f95a3c44fb",
                "userId": "123"
            }
        }
        data["perception"]["inputText"]["text"] = text
        data["userInfo"]["userId"] = uid
        res = requests.post("http://openapi.tuling123.com/openapi/api/v2", json=data)
        # print(res.content)
        res_json = res.json()
    
        text = res_json.get("results")[0].get("values").get("text")
        print(text)
        return text
    
    def my_nlp(text):
        if nlp_client.simnet(text,"你叫什么名字").get("score") >= 0.75:
            A = "我叫银王八"
            return A
    
        if nlp_client.simnet(text,"你今年几岁了").get("score") >= 0.75:
            A = "我今年999岁了"
            return A
    
        A = to_tuling(text,"open123")
        return A
    
    
    def text2audio(text):
        result = client.synthesis(text, 'zh', 1, {
            'vol': 5,
            'per': 4,
            'spd': 4,
            'pit': 7,
        })
    
        filename = f"{uuid4()}.mp3"
    
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        if not isinstance(result, dict):
            # print(result)
            with open(filename, 'wb') as f:
                f.write(result)
    
    
        return filename
    

    3. app.py(路由视图, 逻辑)

    	from flask import Flask, render_template, request, jsonify, send_file
    	from uuid import uuid4
    	from adiou import audio2text, text2audio, my_nlp
    	from mongodb import MONGODB
    	from flask_cors import CORS
    	
    	
    	app = Flask(__name__)
    	CORS(app,  resources={r"/*": {"origins": "*"}})
    	CORS(app, supports_credentials=True)
    	
    	
    	@app.route('/')
    	def hello_world():
    	    print('////')
    	    return render_template("index.html")
    	
    	
    	@app.route("/upload", methods=["POST"])
    	def upload():
    	    print("111")
    	    fi = request.files.get("reco")
    	    print("2222", fi)
    	    fi_name = f"{uuid4()}.wav"
    	    print(fi_name, "3333")
    	    fi.save(fi_name)
    	
    	    text = audio2text(fi_name)
    	    print(text, "text")
    	    text1 = {"kong": text}
    	    res1 = MONGODB.users.insert_one(text1)
    	    print(res1)
    	
    	    new_text = my_nlp(text)
    	    print(new_text, "new_text")
    	    text2 = {"机器人": new_text}
    	    res2 = MONGODB.users.insert_one(text2)
    	    print(res2)
    	
    	    filename = text2audio(new_text)
    	    print(filename, "filename")
    	
    	
    	
    	
    	    ret = {
    	        "filename":filename,
    	        "content":new_text,
    	        "code":0
    	    }
    	
    	    return jsonify(ret)
    	
    	
    	@app.route("/get_file/<filename>")
    	def get_file(filename):
    	    return send_file(filename)
    

    4. mongodb.py

    from pymongo import MongoClient
    
    conn = MongoClient("127.0.0.1", 27017)
    MONGODB = conn["db3"]
    

    5. run.py(Flask启动)

    from app import app
    
    
    if __name__ == '__main__':
    
        app.run("0.0.0.0", 9527, debug=True)
  • 相关阅读:
    学习:SilverLight学习资源
    学习:ASP.NET 页生命周期概述(转)
    学习:Sharepoint2010 List View Filter: date
    学习:Linq学习资源
    总结:免费电子书下载地址
    总结:SharePoint Designer 2010 DVWP (1) 概述 XsltListViewWebPart和DataFormWebPart
    总结:SharePoint Designer 2010 DVWP (2) DataFormWebPart 不同类型的Column对应的XSLT写法
    Spread For Web Forms 3.0 依然无可匹敌
    ComponentOne Studio Enterprise 2007 产品集合
    GTP.NET项目管理甘特图模块
  • 原文地址:https://www.cnblogs.com/konghui/p/10667235.html
Copyright © 2011-2022 走看看