zoukankan      html  css  js  c++  java
  • 用pyaudio做一个声控打开浏览器的小程序

    声控打开浏览器思路很简单:通过麦克风录音产生一个音频文件-->通过调用百度大脑的api识别音频文件中的语音并返回字符串-->通过对字符串的处理使浏览器做出反应

    通过麦克风录音并产生wav文件:


    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    RATE = 8000 //一般8000的采样率能识别出人说的话
    CHANNELS = 1
    record_second = 5 //先设定好几个重要的待处理参数
    def record_wav(to_dir=None):
        if to_dir == None:
            to_dir='./'
    
        pa = pyaudio.PyAudio()   //产生pyaudio对象
    
        stream = pa.open(format=FORMAT,
                         channels = CHANNELS,
                         rate = RATE,
                         input=True,
                         frames_per_buffer = CHUNK)//初始化流
    
        sava_buffer = []
    
        for i in range(0,int(RATE/CHUNK*record_second)):
            audio_data = stream.read(CHUNK)
            sava_buffer.append(audio_data)//将音频文件写入列表
    
        stream.stop_stream()
        stream.close()
        pa.terminate()
    
        file_name = datetime.now().strftime("%Y-%m-%d_%H_%M_%S")+'.wav'
    
        file_path = to_dir+file_name//创建保存文件目录
    
    
        wf = wave.open(file_path,'wb')
        wf.setframerate(RATE)
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(pa.get_sample_size(FORMAT))
        wf.writeframes(b''.join(sava_buffer))        //写入文件
    
        wf.close()
    
        return file_path

    调用百度大脑api

    def baiduys(object):
        VOICE_RATE = 8000
        WAVE_FILE = object
        USER_ID = 'joker'
        WAVE_TYPE = 'wav'
    
        baidu_server = 'https://openapi.baidu.com/oauth/2.0/token?'
        grant_type = 'client_credentials'

      //以下两行需要自己去申请百度开发者账号以获取 client_id='' client_secret = '' url = baidu_server+'grant_type='+grant_type+'&client_id='+client_id+'&client_secret='+client_secret //以上操作均为生成一个用来请求token的url res = urllib.request.urlopen(url).read() data = json.loads(res)//将json转换为字典 token = data['access_token'] with open(WAVE_FILE,'rb') as f://打开wav音频文件 fe = f.read() speech = base64.b64encode(fe) speech1 = speech.decode('utf-8') size = os.path.getsize(WAVE_FILE)      //将字典转换为json用来传输 update = json.dumps({"format":WAVE_TYPE,"rate":VOICE_RATE,"channel":1,'token':token,'cuid':USER_ID,'speech':speech1,'len':size}) update1 = update.encode('utf-8') headers = {'Content-Type':'application/json'} url = 'https://vop.baidu.com/server_api' req = urllib.request.Request(url,update1,headers) r= urllib.request.urlopen(req)//通过上传语音文件以获得识别内容 t= r.read() ans =json.loads(t) //获得的内容仍是json if ans['err_msg']=='success.': result = ans['result'][0].encode('utf-8') if result!='': return result.decode('utf-8') else: print(u'不存在文件0') else: print(u'错误')

    处理字符串并作出反应:

    def text_open_browser(text):
        url = ""
        if text:
            if len(re.split(u"谷歌",text))>1 or len(re.split('google',text))>1:
                url = 'https://www.google.com'
            elif len(re.split(u'百度',text))>1 or len(re.split('baidu',text))>1:
                url = 'https://www.baidu.com'
        if text != "":
            webbrowser.open_new_tab(url)
        else:
            print('no')

    就是一个简单的关键词检索并用webbrowser模块根据关键词所匹配的网址打开浏览器。。。。

    完整代码:

    import base64
    from datetime import datetime
    import json
    import os
    import urllib.request
    
    import wave
    import webbrowser
    
    import pyaudio
    import re
    
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    RATE = 8000
    CHANNELS = 1
    record_second = 5
    def record_wav(to_dir=None):
        if to_dir == None:
            to_dir='./'
    
        pa = pyaudio.PyAudio()
    
        stream = pa.open(format=FORMAT,
                         channels = CHANNELS,
                         rate = RATE,
                         input=True,
                         frames_per_buffer = CHUNK)
    
        sava_buffer = []
    
        for i in range(0,int(RATE/CHUNK*record_second)):
            audio_data = stream.read(CHUNK)
            sava_buffer.append(audio_data)
    
        stream.stop_stream()
        stream.close()
        pa.terminate()
    
        file_name = datetime.now().strftime("%Y-%m-%d_%H_%M_%S")+'.wav'
    
        file_path = to_dir+file_name
    
    
        wf = wave.open(file_path,'wb')
        wf.setframerate(RATE)
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(pa.get_sample_size(FORMAT))
        wf.writeframes(b''.join(sava_buffer))
    
        wf.close()
    
        return file_path
    
    def text_open_browser(text):
        url = ""
        if text:
            if len(re.split(u"谷歌",text))>1 or len(re.split('google',text))>1:
                url = 'https://www.google.com'
            elif len(re.split(u'百度',text))>1 or len(re.split('baidu',text))>1:
                url = 'https://www.baidu.com'
        if text != "":
            webbrowser.open_new_tab(url)
        else:
            print('no')
    
    def baiduys(object):
        VOICE_RATE = 8000
        WAVE_FILE = object
        USER_ID = 'joker'
        WAVE_TYPE = 'wav'
    
        baidu_server = 'https://openapi.baidu.com/oauth/2.0/token?'
        grant_type = 'client_credentials'
        client_id=''
        client_secret = ''
    
        url = baidu_server+'grant_type='+grant_type+'&client_id='+client_id+'&client_secret='+client_secret
    
        res = urllib.request.urlopen(url).read()
    
        data = json.loads(res)
    
        token = data['access_token']
    
        with open(WAVE_FILE,'rb') as f:
            fe = f.read()
            speech =  base64.b64encode(fe)
            speech1 = speech.decode('utf-8')
    
            size = os.path.getsize(WAVE_FILE)
    
    
            update = json.dumps({"format":WAVE_TYPE,"rate":VOICE_RATE,"channel":1,'token':token,'cuid':USER_ID,'speech':speech1,'len':size})
    
            update1 = update.encode('utf-8')
    
            headers = {'Content-Type':'application/json'}
    
            url = 'https://vop.baidu.com/server_api'
    
            req = urllib.request.Request(url,update1,headers)
    
            r= urllib.request.urlopen(req)
    
    
            t= r.read()
    
            ans =json.loads(t)
    
            if ans['err_msg']=='success.':
                result = ans['result'][0].encode('utf-8')
    
                if result!='':
                    return result.decode('utf-8')
                else:
                    print(u'不存在文件0')
            else:
                print(u'错误')
    
    
    
    
    
    
    
    if __name__ =='__main__':
        to_dir = './'
        file_path = record_wav(to_dir)
        file_path1 = 'C:\UserseexfPycharmProjectsmcc'+file_path
    
        text = baiduys(file_path1)
        print(text)
    
        text_open_browser(text)
    View Code
  • 相关阅读:
    hibernate自动建表时设置编码格式
    【友盟统计报表解读】之错误分析iOS版
    用vs2008和vs2005创建win32 console application
    win7 无法启动此程序,因为计算机中丢失glut32.dll
    visual studio 2005 win7 64位版下载
    win7兼容visual studio 2005 的方法
    OpenGL入门学习(一)(转)--环境搭建
    opengl教程
    glPixelStorei(GL_UNPACK_ALIGNMENT, 1)用法
    OpenGL函数思考-glColor
  • 原文地址:https://www.cnblogs.com/jokerspace/p/6685388.html
Copyright © 2011-2022 走看看