声控打开浏览器思路很简单:通过麦克风录音产生一个音频文件-->通过调用百度大脑的api识别音频文件中的语音并返回字符串-->通过对字符串的处理使浏览器做出反应
通过麦克风录音并产生wav文件:
CHUNK = 1024 FORMAT = pyaudio.paInt16 RATE = 8000 //一般8000的采样率能识别出人说的话 CHANNELS = 1 record_second = 5 //先设定好几个重要的待处理参数 def record_wav(to_dir=None): if to_dir == None: to_dir='./' pa = pyaudio.PyAudio() //产生pyaudio对象 stream = pa.open(format=FORMAT, channels = CHANNELS, rate = RATE, input=True, frames_per_buffer = CHUNK)//初始化流 sava_buffer = [] for i in range(0,int(RATE/CHUNK*record_second)): audio_data = stream.read(CHUNK) sava_buffer.append(audio_data)//将音频文件写入列表 stream.stop_stream() stream.close() pa.terminate() file_name = datetime.now().strftime("%Y-%m-%d_%H_%M_%S")+'.wav' file_path = to_dir+file_name//创建保存文件目录 wf = wave.open(file_path,'wb') wf.setframerate(RATE) wf.setnchannels(CHANNELS) wf.setsampwidth(pa.get_sample_size(FORMAT)) wf.writeframes(b''.join(sava_buffer)) //写入文件 wf.close() return file_path
调用百度大脑api:
def baiduys(object): VOICE_RATE = 8000 WAVE_FILE = object USER_ID = 'joker' WAVE_TYPE = 'wav' baidu_server = 'https://openapi.baidu.com/oauth/2.0/token?' grant_type = 'client_credentials'
//以下两行需要自己去申请百度开发者账号以获取 client_id='' client_secret = '' url = baidu_server+'grant_type='+grant_type+'&client_id='+client_id+'&client_secret='+client_secret //以上操作均为生成一个用来请求token的url res = urllib.request.urlopen(url).read() data = json.loads(res)//将json转换为字典 token = data['access_token'] with open(WAVE_FILE,'rb') as f://打开wav音频文件 fe = f.read() speech = base64.b64encode(fe) speech1 = speech.decode('utf-8') size = os.path.getsize(WAVE_FILE) //将字典转换为json用来传输 update = json.dumps({"format":WAVE_TYPE,"rate":VOICE_RATE,"channel":1,'token':token,'cuid':USER_ID,'speech':speech1,'len':size}) update1 = update.encode('utf-8') headers = {'Content-Type':'application/json'} url = 'https://vop.baidu.com/server_api' req = urllib.request.Request(url,update1,headers) r= urllib.request.urlopen(req)//通过上传语音文件以获得识别内容 t= r.read() ans =json.loads(t) //获得的内容仍是json if ans['err_msg']=='success.': result = ans['result'][0].encode('utf-8') if result!='': return result.decode('utf-8') else: print(u'不存在文件0') else: print(u'错误')
处理字符串并作出反应:
def text_open_browser(text): url = "" if text: if len(re.split(u"谷歌",text))>1 or len(re.split('google',text))>1: url = 'https://www.google.com' elif len(re.split(u'百度',text))>1 or len(re.split('baidu',text))>1: url = 'https://www.baidu.com' if text != "": webbrowser.open_new_tab(url) else: print('no')
就是一个简单的关键词检索并用webbrowser模块根据关键词所匹配的网址打开浏览器。。。。
完整代码:
import base64 from datetime import datetime import json import os import urllib.request import wave import webbrowser import pyaudio import re CHUNK = 1024 FORMAT = pyaudio.paInt16 RATE = 8000 CHANNELS = 1 record_second = 5 def record_wav(to_dir=None): if to_dir == None: to_dir='./' pa = pyaudio.PyAudio() stream = pa.open(format=FORMAT, channels = CHANNELS, rate = RATE, input=True, frames_per_buffer = CHUNK) sava_buffer = [] for i in range(0,int(RATE/CHUNK*record_second)): audio_data = stream.read(CHUNK) sava_buffer.append(audio_data) stream.stop_stream() stream.close() pa.terminate() file_name = datetime.now().strftime("%Y-%m-%d_%H_%M_%S")+'.wav' file_path = to_dir+file_name wf = wave.open(file_path,'wb') wf.setframerate(RATE) wf.setnchannels(CHANNELS) wf.setsampwidth(pa.get_sample_size(FORMAT)) wf.writeframes(b''.join(sava_buffer)) wf.close() return file_path def text_open_browser(text): url = "" if text: if len(re.split(u"谷歌",text))>1 or len(re.split('google',text))>1: url = 'https://www.google.com' elif len(re.split(u'百度',text))>1 or len(re.split('baidu',text))>1: url = 'https://www.baidu.com' if text != "": webbrowser.open_new_tab(url) else: print('no') def baiduys(object): VOICE_RATE = 8000 WAVE_FILE = object USER_ID = 'joker' WAVE_TYPE = 'wav' baidu_server = 'https://openapi.baidu.com/oauth/2.0/token?' grant_type = 'client_credentials' client_id='' client_secret = '' url = baidu_server+'grant_type='+grant_type+'&client_id='+client_id+'&client_secret='+client_secret res = urllib.request.urlopen(url).read() data = json.loads(res) token = data['access_token'] with open(WAVE_FILE,'rb') as f: fe = f.read() speech = base64.b64encode(fe) speech1 = speech.decode('utf-8') size = os.path.getsize(WAVE_FILE) update = json.dumps({"format":WAVE_TYPE,"rate":VOICE_RATE,"channel":1,'token':token,'cuid':USER_ID,'speech':speech1,'len':size}) update1 = update.encode('utf-8') headers = {'Content-Type':'application/json'} url = 'https://vop.baidu.com/server_api' req = urllib.request.Request(url,update1,headers) r= urllib.request.urlopen(req) t= r.read() ans =json.loads(t) if ans['err_msg']=='success.': result = ans['result'][0].encode('utf-8') if result!='': return result.decode('utf-8') else: print(u'不存在文件0') else: print(u'错误') if __name__ =='__main__': to_dir = './' file_path = record_wav(to_dir) file_path1 = 'C:\UserseexfPycharmProjectsmcc'+file_path text = baiduys(file_path1) print(text) text_open_browser(text)