zoukankan      html  css  js  c++  java
  • 语音识别,文本转语音,语音转文本

    1. 语音合成.
    	百度AipSpeech:
    	synthesis():
    		1: 与合成语音的文本.
    		2. 语言.
    		3. 客户端类型,1 
    		4.option{语音合成参数}: pit 声调  spd 语速 vol 音量 per cosplay 合成语音角色.
    		
    2. 语音识别
    	百度 AipSpeech
    		 asr():
    			1. 音频文件流 
    			2. 音频文件格式(pcm)
    			3. 音频采样率(16000, 8k/16k)
    			4. option{识别语种}: dev_pid :1537  包含简单英文的普通话识别. 
    		
    		录音文件基本上没有直接pcm格式,需要对音频文件进行转换.
    		ffmpeg:FFmpeg公司.涉及audio 处理的基本上都用它了.
    		通过os.system(FFmpeg_cmdstr)得到新的pcm文件.
    		
    

    1.  百度注册账号.

    案例1 .文本转语音

    from aip import AipSpeech
    from aip import AipNlp
    from uuid import uuid4
    import setting
    import os
    
    APP_ID = "11562884"
    API_KEY = "9iOLKP9VCo4nsEf3N8dcOUmT"
    SECRET_KEY = "aW0kwOHFbHrQely6bcmGTzU49t2jOYdL"
    
    baidu_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    baidu_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    
    #.1 文本转语音.
    def text2audio(text):
    
        file_name = f"{uuid4()}.mp3"
        print("filename:%s"%file_name)
        file_path = os.path.join(setting.AUDIO_FILE_PATH, file_name)
        print("file_path:%s" %(file_path))
        res = baidu_client.synthesis(text, "zh", 1, {
            "vol": 5,
            "pit": 7,
            "spd": 4,
            "per": 4
        })
    
        if type(res) == dict:
            return res
    
        with open(file_path, "wb") as f:
            f.write(res)
    
        return file_name
    
    print(text2audio("强哥威武"))

    打印结果:

    filename:86375f56-f42d-447a-9bcc-829d370ff805.mp3
    file_path:audio86375f56-f42d-447a-9bcc-829d370ff805.mp3
    86375f56-f42d-447a-9bcc-829d370ff805.mp3

    案例2.语音转文本.

    def audio2text(file_name):
        file_pcm_path =os.path.join(setting.AUDIO_FILE_PATH,file_name)
        cmd_str =f"ffmpeg -y -i {file_pcm_path} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {file_pcm_path}.pcm"
        os.system(cmd_str)
    
        with open(f"{file_pcm_path}","rb") as f :
            audio_context =f.read()
    
        res =baidu_client.asr(audio_context,"pcm",16000,{"dev_pid": 1537})
    
        if res.get('err_no'):
            return res
    
        return res.get("result")[0]
    
    print(audio2text("qiang.wma"))

    案例3. 语音识别 NLP 

    #.3 语音识别
    def my_nlp(text):
        if baidu_nlp.simnet("你今年几岁了 ",text).get("score")>=0.72:
            print(baidu_nlp.simnet("你今年几岁了 ",text).get("score"))
            return "我今年73岁了,不然84岁也行"
    
        if baidu_nlp.simnet("你叫什么名字",text).get("score")>=0.72:
            return "我的名字叫做小嘿嘿"
    
        if baidu_nlp.simnet("你在哪里学习",text).get("score")>=0.72:
            return "我在学习python"
    
    
    print(my_nlp('你多大了'))  #结果:我今年73岁了,不然84岁也行
    
    print(my_nlp("你的名字是")) #结果:我的名字叫做小嘿嘿

      

    代码总结 :

    from aip import AipSpeech
    from aip import AipNlp
    from uuid import uuid4
    import setting
    import os
    
    APP_ID = "11562884"
    API_KEY = "9iOLKP9VCo4nsEf3N8dcOUmT"
    SECRET_KEY = "aW0kwOHFbHrQely6bcmGTzU49t2jOYdL"
    
    baidu_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    baidu_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
    
    #.1 文本转语音.
    def text2audio(text):
    
        file_name = f"{uuid4()}.mp3"
        print("filename:%s"%file_name)
        file_path = os.path.join(setting.AUDIO_FILE_PATH, file_name)
        print("file_path:%s" %(file_path))
        res = baidu_client.synthesis(text, "zh", 1, {
            "vol": 5,
            "pit": 7,
            "spd": 4,
            "per": 4
        })
    
        if type(res) == dict:
            return res
    
        with open(file_path, "wb") as f:
            f.write(res)
    
        return file_name
    #
    # print(text2audio("强哥威武"))
    
    
    
    #.2 语音转文本.
    def audio2text(file_name):
        file_pcm_path =os.path.join(setting.AUDIO_FILE_PATH,file_name)
        cmd_str =f"ffmpeg -y -i {file_pcm_path} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {file_pcm_path}.pcm"
        os.system(cmd_str)
    
        with open(f"{file_pcm_path}","rb") as f :
            audio_context =f.read()
    
        res =baidu_client.asr(audio_context,"pcm",16000,{"dev_pid": 1537})
    
        if res.get('err_no'):
            return res
    
        return res.get("result")[0]
    #
    print(audio2text("qiang.wma"))
    
    
    #.3 语音识别
    def my_nlp(text):
        if baidu_nlp.simnet("你今年几岁了 ",text).get("score")>=0.72:
            print(baidu_nlp.simnet("你今年几岁了 ",text).get("score"))
            return "我今年73岁了,不然84岁也行"
    
        if baidu_nlp.simnet("你叫什么名字",text).get("score")>=0.72:
            return "我的名字叫做小嘿嘿"
    
        if baidu_nlp.simnet("你在哪里学习",text).get("score")>=0.72:
            return "我在学习python"
    
    
    print(my_nlp('你多大了'))  #结果:我今年73岁了,不然84岁也行
    
    print(my_nlp("你的名字是")) #结果:我的名字叫做小嘿嘿
    View Code

    send_file 与audio标签.

    py文件

    from  flask  import Flask,request,send_file
    import setting
    import os
    
    app =Flask(__name__)
    
    
    
    @app.route("/getfile/<file_name>")
    def get_file(file_name):
    
        audio_file =os.path.join(setting.AUDIO_FILE_PATH,file_name)
    
        return send_file(audio_file)
    
    
    if __name__ =="__main__":
        app.run()

    html文件

    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Title</title>
    </head>
    <body>
    <audio autoplay="autoplay" id="play_mp3" src="http://127.0.0.1:1111/getfile/7b86f60d-6f43-4342-9948-38657de4843e.mp3"></audio>
    </body>
    </html>

  • 相关阅读:
    Equivalent Sets HDU
    Chemical table CFR500 div2D(并查集)
    How do I create an installation log?
    Error 1937.An error occurred during the installation of assembly...
    InstallShield 版本转换
    Convert CString to TCHAR
    InstallShield : 如何查找编译后的 Merge Module存放路径
    Msi.h causes compilation error in vs2010
    区间调度(贪心)
    硬币问题(贪心)
  • 原文地址:https://www.cnblogs.com/mengbin0546/p/10315538.html
Copyright © 2011-2022 走看看