zoukankan      html  css  js  c++  java
  • 人工智障

    1.百度的语音合成

    具体配置参考 http://ai.baidu.com/docs#/TTS-Online-Python-SDK/top

    from aip import AipSpeech
    
    """ 你的 APPID AK SK """
    APP_ID = '15421010'
    API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi'
    SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz'
    
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    
    result = client.synthesis('来到这个世界,天使都会犯错', 'zh', 1, {
        'vol': 5,
    })
    
    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    if not isinstance(result, dict):
        with open('audio.mp3', 'wb') as f:
            f.write(result)

    2.百度的语音识别

    具体配置参考 http://ai.baidu.com/docs#/ASR-Online-Python-SDK/top

    import os
    from aip import AipSpeech
    
    """ 你的 APPID AK SK """
    APP_ID = '15421010'
    API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi'
    SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz'
    
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    
    # 读取文件
    def get_file_content(filePath):
        os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath.split('.')[0]}.pcm") #执行的是一个对文件格式的转换,需要下载一个ffmgep
        with open(f"{filePath.split('.')[0]}.pcm", 'rb') as fp:
            return fp.read()
    
    # 识别本地文件
    res = client.asr(get_file_content('jrshdls.m4a'), 'pcm', 16000, {
        'dev_pid': 1536,
    })
    
    print(res)

    3.两个结合

    import os
    import time
    from aip import AipSpeech
    
    """ 你的 APPID AK SK """
    APP_ID = '15421010'
    API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi'
    SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz'
    
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    
    
    # 读取文件
    def get_file_content(filePath):
        os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath.split('.')[0]}.pcm")
        with open(f"{filePath.split('.')[0]}.pcm", 'rb') as fp:
            return fp.read()
    
    
    def audio2text(filepath):
        # 识别本地文件
        res = client.asr(get_file_content(filepath), 'pcm', 16000, {
            'dev_pid': 1536,
        })
        print(res.get("result")[0])
        return res.get("result")[0]
    
    
    def text2audio(text):
        filename = f"{time.time()}.mp3"
        result = client.synthesis(text, 'zh', 1, {
            'vol': 5,
        })
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        if not isinstance(result, dict):
            with open(filename, 'wb') as f:
                f.write(result)
        return filename
    
    
    text = audio2text("jrshdls.m4a")
    filename = text2audio(text)
    
    os.system(filename)

    4.图灵机器人的简单使用

    具体参考 https://www.kancloud.cn/turing/www-tuling123-com/718227

    import requests
    
    
    args = {
        "reqType":0,
        "perception": {
            "inputText": {
                "text": "附近的酒店"
            }
        },
        "userInfo": {
            "apiKey": "08a682c47e334a11bd99cbf093930b63",
            "userId": "1"
        }
    }
    
    url = "http://openapi.tuling123.com/openapi/api/v2"
    
    res = requests.post(url, json=args)
    
    print(res.json())

    5.人工智障的简单应用

    import os
    import time
    from aip import AipSpeech, AipNlp #提供自认语言处理
    
    """ 你的 APPID AK SK """
    APP_ID = '15421010'
    API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi'
    SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz'
    
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) #自认语言处理
    
    
    # 读取文件
    def get_file_content(filePath):
        os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath.split('.')[0]}.pcm")
        with open(f"{filePath.split('.')[0]}.pcm", 'rb') as fp:
            return fp.read()
    
    
    def audio2text(filepath):
        # 识别本地文件
        res = client.asr(get_file_content(filepath), 'pcm', 16000, {
            'dev_pid': 1536,
        })
        print(res.get("result")[0])
        return res.get("result")[0]
    
    
    def text2audio(text):
        filename = f"{time.time()}.mp3"
        result = client.synthesis(text, 'zh', 1, {
            'vol': 5,
        })
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        if not isinstance(result, dict):
            with open(filename, 'wb') as f:
                f.write(result)
        return filename
    
    # print(nlp.simnet("你叫什么名字", "你的名字是什么"))
    
    text = audio2text("jrshdls.m4a")
    if nlp.simnet("时间的多少", text).get("score") >= 0.68: #这个方法会匹配两个字符串的相似度
        text = "我的名字是小明"
    else:
        text = "我不知道你在说什么"
    filename = text2audio(text)
    
    os.system(filename)

     6.如何在浏览器中实现图灵机器人的对话

    baidu_ai.py

    import os
    import time
    from aip import AipSpeech, AipNlp
    
    """ 你的 APPID AK SK """
    APP_ID = '15421010'
    API_KEY = 'YSKSaidmdyWkfhnhWezCeTqi'
    SECRET_KEY = 'EXfzsG9tr6fI6U4KPz28XQmXa8MIBKMz'
    
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    
    
    # 读取文件
    def get_file_content(filePath):
        os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath.split('.')[0]}.pcm")
        with open(f"{filePath.split('.')[0]}.pcm", 'rb') as fp:
            return fp.read()
    
    
    def audio2text(filepath):
        # 识别本地文件
        res = client.asr(get_file_content(filepath), 'pcm', 16000, {
            'dev_pid': 1536,
        })
        print(res.get("result")[0])
        return res.get("result")[0]
    
    
    def text2audio(text):
        filename = f"{time.time()}.mp3"
        result = client.synthesis(text, 'zh', 1, {
            'vol': 5,
        })
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        if not isinstance(result, dict):
            with open(filename, 'wb') as f:
                f.write(result)
        return filename
    
    
    def to_tuling(text):
        import requests
    
        args = {
            "reqType": 0,
            "perception": {
                "inputText": {
                    "text": text
                }
            },
            "userInfo": {
                "apiKey": "08a682c47e334a11bd99cbf093930b63",
                "userId": "1"
            }
        }
    
        url = "http://openapi.tuling123.com/openapi/api/v2"
        res = requests.post(url, json=args)
        text = res.json().get("results")[0].get("values").get("text")
        return text

    app.py

    from flask import Flask, request, render_template, jsonify, send_file
    import baidu_ai
    from uuid import uuid4
    
    app = Flask(__name__)
    
    
    @app.route("/")
    def index():
        return render_template("index.html")
    
    
    @app.route("/ai", methods=["POST"])
    def ai():
        # 1.保存录音文件
        audio = request.files.get("record")
        filename = f"{uuid4()}.wav"
        audio.save(filename)
        # 2.将录音文件转换为PCM发送给百度进行语音识别
        q_text = baidu_ai.audio2text(filename)
    
        # 3.将识别的问题交给图灵或自主处理获取答案
        a_text = baidu_ai.to_tuling(q_text)
    
        # 4.将答案发送给百度语音合成,合成音频文件
        a_file = baidu_ai.text2audio(a_text)
    
        # 5.将音频文件发送给前端播放
    
        return jsonify({"filename": a_file})
    
    
    @app.route("/get_audio/<filename>")
    def get_audio(filename):
        return send_file(filename)
    
    
    if __name__ == '__main__':
        app.run("0.0.0.0", 5000, debug=True)

    index.html

    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Title</title>
    </head>
    <body>
    <audio controls autoplay id="player"></audio>
    <p>
        <button onclick="start_reco()" style="background-color: yellow">录制语音指令</button>
    </p>
    <p>
        <button onclick="stop_reco_audio()" style="background-color: blue">发送语音指令</button>
    </p>
    </body>
    <!--<script type="application/javascript" src="/static/Recorder.js"></script>-->
    <script type="application/javascript" src="https://cdn.bootcss.com/recorderjs/0.1.0/recorder.js"></script>
    <script type="text/javascript" src="/static/jQuery3.1.1.js"></script>
    
    <script type="text/javascript">
        var reco = null;
        var audio_context = new AudioContext();
        navigator.getUserMedia = (navigator.getUserMedia ||
            navigator.webkitGetUserMedia ||
            navigator.mozGetUserMedia ||
            navigator.msGetUserMedia);
    
        navigator.getUserMedia({audio: true}, create_stream, function (err) {
            console.log(err)
        });
    
        function create_stream(user_media) {
            var stream_input = audio_context.createMediaStreamSource(user_media);
            reco = new Recorder(stream_input);
        }
    
        function start_reco() {
            reco.record();
        }
    
    
        function stop_reco_audio() {
            reco.stop();
            send_audio();
            reco.clear();
        }
    
    
        function send_audio() {
            reco.exportWAV(function (wav_file) {
                var formdata = new FormData();
                formdata.append("record", wav_file);
                console.log(formdata);
                $.ajax({
                    url: "http://192.168.13.177:5000/ai",
                    type: 'post',
                    processData: false,
                    contentType: false,
                    data: formdata,
                    dataType: 'json',
                    success: function (data) {
                        console.log(data);
                        document.getElementById("player").src ="http://192.168.13.177:5000/get_audio/" + data.filename
                    }
                });
    
            })
        }
    
    
    
    </script>
    </html>
  • 相关阅读:
    RestTemplate的异常:Not enough variables available to expand
    WebApplicationContext类的作用
    select动态绑定vue.js
    spring的 @Scheduled的cron表达式
    Spring使用webjar
    ThreadLocal基本原理及运用
    mybatis choose标签的使用
    @RequestBody和@RequestParam区别
    js遍历 for-of
    MySql 模糊查询
  • 原文地址:https://www.cnblogs.com/qq849784670/p/10274138.html
Copyright © 2011-2022 走看看