1. 语音合成. 百度AipSpeech: synthesis(): 1: 与合成语音的文本. 2. 语言. 3. 客户端类型,1 4.option{语音合成参数}: pit 声调 spd 语速 vol 音量 per cosplay 合成语音角色. 2. 语音识别 百度 AipSpeech asr(): 1. 音频文件流 2. 音频文件格式(pcm) 3. 音频采样率(16000, 8k/16k) 4. option{识别语种}: dev_pid :1537 包含简单英文的普通话识别. 录音文件基本上没有直接pcm格式,需要对音频文件进行转换. ffmpeg:FFmpeg公司.涉及audio 处理的基本上都用它了. 通过os.system(FFmpeg_cmdstr)得到新的pcm文件.
1. 百度注册账号.
案例1 .文本转语音
from aip import AipSpeech from aip import AipNlp from uuid import uuid4 import setting import os APP_ID = "11562884" API_KEY = "9iOLKP9VCo4nsEf3N8dcOUmT" SECRET_KEY = "aW0kwOHFbHrQely6bcmGTzU49t2jOYdL" baidu_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) baidu_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) #.1 文本转语音. def text2audio(text): file_name = f"{uuid4()}.mp3" print("filename:%s"%file_name) file_path = os.path.join(setting.AUDIO_FILE_PATH, file_name) print("file_path:%s" %(file_path)) res = baidu_client.synthesis(text, "zh", 1, { "vol": 5, "pit": 7, "spd": 4, "per": 4 }) if type(res) == dict: return res with open(file_path, "wb") as f: f.write(res) return file_name print(text2audio("强哥威武"))
打印结果:
filename:86375f56-f42d-447a-9bcc-829d370ff805.mp3
file_path:audio86375f56-f42d-447a-9bcc-829d370ff805.mp3
86375f56-f42d-447a-9bcc-829d370ff805.mp3
案例2.语音转文本.
def audio2text(file_name): file_pcm_path =os.path.join(setting.AUDIO_FILE_PATH,file_name) cmd_str =f"ffmpeg -y -i {file_pcm_path} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {file_pcm_path}.pcm" os.system(cmd_str) with open(f"{file_pcm_path}","rb") as f : audio_context =f.read() res =baidu_client.asr(audio_context,"pcm",16000,{"dev_pid": 1537}) if res.get('err_no'): return res return res.get("result")[0] print(audio2text("qiang.wma"))
案例3. 语音识别 NLP
#.3 语音识别 def my_nlp(text): if baidu_nlp.simnet("你今年几岁了 ",text).get("score")>=0.72: print(baidu_nlp.simnet("你今年几岁了 ",text).get("score")) return "我今年73岁了,不然84岁也行" if baidu_nlp.simnet("你叫什么名字",text).get("score")>=0.72: return "我的名字叫做小嘿嘿" if baidu_nlp.simnet("你在哪里学习",text).get("score")>=0.72: return "我在学习python" print(my_nlp('你多大了')) #结果:我今年73岁了,不然84岁也行 print(my_nlp("你的名字是")) #结果:我的名字叫做小嘿嘿
代码总结 :
from aip import AipSpeech from aip import AipNlp from uuid import uuid4 import setting import os APP_ID = "11562884" API_KEY = "9iOLKP9VCo4nsEf3N8dcOUmT" SECRET_KEY = "aW0kwOHFbHrQely6bcmGTzU49t2jOYdL" baidu_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) baidu_nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) #.1 文本转语音. def text2audio(text): file_name = f"{uuid4()}.mp3" print("filename:%s"%file_name) file_path = os.path.join(setting.AUDIO_FILE_PATH, file_name) print("file_path:%s" %(file_path)) res = baidu_client.synthesis(text, "zh", 1, { "vol": 5, "pit": 7, "spd": 4, "per": 4 }) if type(res) == dict: return res with open(file_path, "wb") as f: f.write(res) return file_name # # print(text2audio("强哥威武")) #.2 语音转文本. def audio2text(file_name): file_pcm_path =os.path.join(setting.AUDIO_FILE_PATH,file_name) cmd_str =f"ffmpeg -y -i {file_pcm_path} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {file_pcm_path}.pcm" os.system(cmd_str) with open(f"{file_pcm_path}","rb") as f : audio_context =f.read() res =baidu_client.asr(audio_context,"pcm",16000,{"dev_pid": 1537}) if res.get('err_no'): return res return res.get("result")[0] # print(audio2text("qiang.wma")) #.3 语音识别 def my_nlp(text): if baidu_nlp.simnet("你今年几岁了 ",text).get("score")>=0.72: print(baidu_nlp.simnet("你今年几岁了 ",text).get("score")) return "我今年73岁了,不然84岁也行" if baidu_nlp.simnet("你叫什么名字",text).get("score")>=0.72: return "我的名字叫做小嘿嘿" if baidu_nlp.simnet("你在哪里学习",text).get("score")>=0.72: return "我在学习python" print(my_nlp('你多大了')) #结果:我今年73岁了,不然84岁也行 print(my_nlp("你的名字是")) #结果:我的名字叫做小嘿嘿
send_file 与audio标签.
py文件
from flask import Flask,request,send_file import setting import os app =Flask(__name__) @app.route("/getfile/<file_name>") def get_file(file_name): audio_file =os.path.join(setting.AUDIO_FILE_PATH,file_name) return send_file(audio_file) if __name__ =="__main__": app.run()
html文件
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <audio autoplay="autoplay" id="play_mp3" src="http://127.0.0.1:1111/getfile/7b86f60d-6f43-4342-9948-38657de4843e.mp3"></audio> </body> </html>