zoukankan      html  css  js  c++  java
  • 百度AI开放平台,语音识别,语音合成以及短文本相似度

    百度AI开放平台:https://ai.baidu.com/

    语音合成

    from aip import AipSpeech
    APP_ID="15420964"   #'你的 App ID'
    API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"  #'你的 Api Key'
    SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW"   #'你的 Secret Key'
    
    client=AipSpeech(APP_ID,API_KEY,SECRET_KEY)
    
    result=client.synthesis("快乐的池塘里面有只小青蛙,呱呱呱儿  ","zh",1,{
        "vol":5,   #音量
        "spd":4,   #语速
        "pit":7,   #语调
        "per":1    #音色(0,1,3,4)
    })
    print(result)
    
    if not isinstance(result,dict):
        with open("audio.mp3","wb") as f:
            f.write(result)

    语音识别

    from aip import AipSpeech
    import os
    
    
    """ 你的 APPID AK SK """
    APP_ID="15420964"
    API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
    SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW"
    
    client=AipSpeech(APP_ID,API_KEY,SECRET_KEY)
    
    
    # 读取文件
    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()
    
    # 识别本地文件
    res=client.asr(get_file_content('wb.m4a'), 'pcm', 16000, {
        'dev_pid': 1536,
    })
    
    print(res.get("result")[0])

    学说话

    from aip import AipSpeech
    import os
    import time
    
    
    """ 你的 APPID AK SK """
    
    APP_ID="15420964"
    API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
    SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW"
    
    client=AipSpeech(APP_ID,API_KEY,SECRET_KEY)
    
    ############语音识别##########
    #读取文件
    def get_file_content(filePath):
        os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
        with open(f"{filePath}.pcm", 'rb') as fp:
            return fp.read()
    
    # 识别本地文件
    def audio2text(filename):
        res=client.asr(get_file_content(filename), 'pcm', 16000, {
            'dev_pid': 1536,
        })
        return res.get("result")[0]
    
    ############语音合成#########
    def text2audio(getedtext):
        filename = f"{time.time()}.mp3"
        result = client.synthesis(getedtext, "zh", 1, {
            "vol": 5,
            "spd": 4,
            "pit": 7,
            "per": 0
        })
    
        if not isinstance(result, dict):
            with open(filename, "wb") as f:
                f.write(result)
        return   filename
    
    getedtext=audio2text("wb.m4a")
    
    res=text2audio(getedtext)
    
    os.system(res)

    语音回答问题

    from aip import AipSpeech
    import os
    import time
    
    
    """ 你的 APPID AK SK """
    
    APP_ID="15420964"
    API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
    SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW"
    
    client=AipSpeech(APP_ID,API_KEY,SECRET_KEY)
    
    ############语音识别##########
    #读取文件
    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()
    
    # 识别本地文件
    def audio2text(filepath):
        res = client.asr(get_file_content(filepath), 'pcm', 16000, {
            'dev_pid': 1536,
        })
        return res.get("result")[0]
    
    filename = f"{time.time()}.mp3"
    ############语音合成#########
    def text2audio(getedtext):
    
        result = client.synthesis(getedtext, "zh", 1, {
            "vol": 5,
            "spd": 4,
            "pit": 7,
            "per": 1
        })
        # print(result)
    
        if not isinstance(result, dict):
            with open(filename, "wb") as f:
                f.write(result)
    getedtext=audio2text("wb.m4a")
    
    text2audio(getedtext)
    
    os.system(filename)

    短文本相似度

    from aip import AipSpeech,AipNlp
    import os
    import time
    
    
    """ 你的 APPID AK SK """
    
    APP_ID="15420964"
    API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
    SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW"
    
    client=AipSpeech(APP_ID,API_KEY,SECRET_KEY)
    nlp =  AipNlp(APP_ID, API_KEY, SECRET_KEY)
    
    ############语音识别##########
    #读取文件
    def get_file_content(filePath):
        os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
        with open(f"{filePath}.pcm", 'rb') as fp:
            return fp.read()
    
    # 识别本地文件
    def audio2text(filename):
        res=client.asr(get_file_content(filename), 'pcm', 16000, {
            'dev_pid': 1536,
        })
        return res.get("result")[0]
    
    ############语音合成#########
    def text2audio(getedtext):
        filename = f"{time.time()}.mp3"
        result = client.synthesis(getedtext, "zh", 1, {
            "vol": 5,
            "spd": 4,
            "pit": 7,
            "per": 0
        })
    
        if not isinstance(result, dict):
            with open(filename, "wb") as f:
                f.write(result)
        return   filename
    
    ##########图灵################
    def to_tuling(text):
        import requests
    
        args = {
            "reqType": 0,
            "perception": {
                "inputText": {
                    "text": text
                }
            },
            "userInfo": {
                "apiKey": "e963f65c4c7a466a80e5aaa3510da2fa",
                "userId": "1111"
            }
        }
    
        url = "http://openapi.tuling123.com/openapi/api/v2"
    
        res = requests.post(url, json=args)
        print(res)
        text = res.json().get("results")[0].get("values").get("text")
    
        print("图灵答案",text)
        return text
    
    ###########调用################
    
    getedtext=audio2text("wb.m4a")
    
    if nlp.simnet("你叫什么名字",getedtext).get("score")>=0.68:
        getedtext="我才不告诉你呢,你个糟老头子坏得很"
    else:
        getedtext=to_tuling(getedtext)
    
    res=text2audio(getedtext)
    
    os.system(res)

    对话机器人玩具

    应用结构:

    在index.html中

    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Title</title>
    </head>
    <body>
    <audio controls autoplay id="player"></audio>
    <p>
        <button onclick="start_reco()" style="background-color: yellow">录制语音指令</button>
    </p>
    <p>
        <button onclick="stop_reco_audio()" style="background-color: blue">发送语音指令</button>
    </p>
    </body>
    <!--<script type="application/javascript" src="/static/Recorder.js"></script>-->
    <script type="application/javascript" src="https://cdn.bootcss.com/recorderjs/0.1.0/recorder.js"></script>
    <script type="text/javascript" src="/static/jQuery3.1.1.js"></script>
    
    <script type="text/javascript">
        var reco = null;
        var audio_context = new AudioContext();
        navigator.getUserMedia = (navigator.getUserMedia ||
            navigator.webkitGetUserMedia ||
            navigator.mozGetUserMedia ||
            navigator.msGetUserMedia);
    
        navigator.getUserMedia({audio: true}, create_stream, function (err) {
            console.log(err)
        });
    
        function create_stream(user_media) {
            var stream_input = audio_context.createMediaStreamSource(user_media);
            reco = new Recorder(stream_input);
        }
    
        function start_reco() {
            reco.record();
        }
    
    
        function stop_reco_audio() {
            reco.stop();
            send_audio();
            reco.clear();
        }
    
    
        function send_audio() {
            reco.exportWAV(function (wav_file) {
                var formdata = new FormData();
                formdata.append("record", wav_file);
                console.log(formdata);
                $.ajax({
                    url: "http://192.168.43.158:9527/ai",
                    type: 'post',
                    processData: false,
                    contentType: false,
                    data: formdata,
                    dataType: 'json',
                    success: function (data) {
                        document.getElementById("player").src ="http://192.168.43.158:9527/get_audio/" + data.filename
                    }
                });
    
            })
        }
    
    
    
    </script>
    </html>

    在app.py中

    from flask import Flask,render_template,request,jsonify,send_file
    from uuid import uuid4
    import baidu_ai
    
    app = Flask(__name__)
    
    @app.route("/")
    def index():
        return render_template("index.html")
    
    @app.route("/ai",methods=["POST"])
    def ai():
        # 1.保存录音文件
        print(111)
        audio = request.files.get("record")
        print('audio',audio)
        filename = f"{uuid4()}.wav"
        audio.save(filename)
        #2.将录音文件转换为PCM发送给百度进行语音识别
        q_text = baidu_ai.audio2text(filename)
        print(8585)
        print(q_text)
        #3.将识别的问题交给图灵或自主处理获取答案
        a_text = baidu_ai.to_tuling(q_text)
        print(a_text)
        #4.将答案发送给百度语音合成,合成音频文件
        a_file = baidu_ai.text2audio(a_text)
        print(a_file)
        #5.将音频文件发送给前端播放
    
        return jsonify({"filename":a_file})
    
    
    @app.route("/get_audio/<filename>")
    def get_audio(filename):
        print(filename)
        return send_file(filename)
    
    
    
    if __name__ == '__main__':
        app.run("0.0.0.0",9527,debug=True)

    在baidu_ai.py中

    from aip import AipSpeech,AipNlp
    import time,os
    
    """ 你的 APPID AK SK """
    APP_ID="15420964"
    API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
    SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW"
    
    # client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    
    nlp =  AipNlp(APP_ID, API_KEY, SECRET_KEY)
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    
    # 读取文件
    def get_file_content(filePath):
        print(234)
        os.system(f"ffmpeg -y  -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
        print(354)
        with open(f"{filePath}.pcm", 'rb') as fp:
            return fp.read()
    
    def audio2text(filepath):
        # 识别本地文件
        print(123)
        res = client.asr(get_file_content(filepath), 'pcm', 16000, {
            'dev_pid': 1536,
        })
        print(456456)
        print('----------res',res)
    
        return res.get("result")[0]
    
    def text2audio(text):
        filename = f"{time.time()}.mp3"
        result = client.synthesis(text, 'zh', 1, {
            'vol': 5,
            "spd": 3,
            "pit": 7,
            "per": 4
        })
    
        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        if not isinstance(result, dict):
            with open(filename, 'wb') as f:
                f.write(result)
    
        return filename
    
    def to_tuling(text):
        import requests
    
        args = {
            "reqType": 0,
            "perception": {
                "inputText": {
                    "text": text
                }
            },
            "userInfo": {
                "apiKey": "e963f65c4c7a466a80e5aaa3510da2fa",
                "userId": "1111"
            }
        }
    
        url = "http://openapi.tuling123.com/openapi/api/v2"
    
        res = requests.post(url, json=args)
        print(res)
        text = res.json().get("results")[0].get("values").get("text")
    
        print("图灵答案",text)
        return text

     

  • 相关阅读:
    PowerShell笔记
    Windows难民安装docker的注意事项
    minix3使用轻快入门
    gentoo(贱兔) Linux作业系统的基本使用
    Artix Linux作业系统的使用~
    CentOS7搭建sftp
    Hello Wolrd
    Android开发技术周报 Issue#1
    Android开发技术周报 Issue#4
    Android开发技术周报 Issue#3
  • 原文地址:https://www.cnblogs.com/shanghongyun/p/10277476.html
Copyright © 2011-2022 走看看