zoukankan      html  css  js  c++  java
  • 文本转音频(百度语音合成api)(python)(原创)

    迄今为止,程序调用成功次数统计图表(2019-05-20)

    - - - - - - - - - - - 分割线 - - - - - - - - - - 

      应之前的一家小学教育培训机构的要求设计的一款 将文字转音频的程序。(注:后面应该是生成音频才对,没有改过来)

    技术难点:

    ①语音合成,如果没有现在这么多的云服务-百度云语音合成,我估计这个程序会费很大的功夫才能完成。

    1,UI界面设计(精简设计),使用tk框架

    功能说明:
    1,文本标记功能整合进程序中;
    2,联网的http API接口整合;
    1,网络检查;
    2,断网异常检测;
    3,网络传数据、接数据;
    3,文本转语音的baidu-api接口整合
    1,语速,取值0-9,默认为5中语速;
    2,音调,取值0-9,默认为5中语调;
    3,音量,取值0-15,默认为5中音量;
    4,发音人选择, 0为女声,1为男声,3为情感合成-度逍遥,4为情感合成-度丫丫,默认为普通女;
    4,音频保存自主设置;

    注:其中的逗号、句号重复是甲方要求我实现的功能

    2,文件详情:

    其中 Python35_64 为带所需库的python3.5 64位的文件,整个程序使用 双击启动文本标记程序.bat 进行启动

    3,文字转音频使用百度云的api接口实现:

    def audio_out(string_,path,audio_set=None):
      #百度云网络网络配置
      APP_ID = '14636839'
      API_KEY = 'u34cdKCsR61tzM0r20Qnp3YM'
      SECRET_KEY = 'Rehs5G02EqbrRDDgGdPY9NKY2ZlCX8Oq'
      client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
      if len(string_)>500:
        num=int(len(string_)/500)
        #第一段音频
        result  = client.synthesis(string_[0:500], 'zh', 1, {'spd':audio_set[0],'pit':audio_set[1],'vol': audio_set[2],'per':audio_set[3]})
        open(path, 'wb').write(result)
        #中间段落的音频
        for i in range(1,num):
          result  = client.synthesis(string_[i*500:(i+1)*500], 'zh', 1, {'spd':audio_set[0],'pit':audio_set[1],'vol': audio_set[2],'per':audio_set[3]})
          open(path, 'ab').write(result)
        #最后一段音频
        result  = client.synthesis(string_[num*500:], 'zh', 1, {'spd':audio_set[0],'pit':audio_set[1],'vol': audio_set[2],'per':audio_set[3]})
        open(path, 'ab').write(result)
      else:
        result = client.synthesis(string_, 'zh', 1, {'spd':audio_set[0],'pit':audio_set[1],'vol': audio_set[2],'per':audio_set[3]})
        open(path, 'wb').write(result)

    注:由于百度云的语音合成api最大可以支持500个字符长度,这里使用提前截取的方法来进行连续上传数据返回音频,之后进行音频数据的首尾相加。

    4,完整的音频处理文件

    #文字转音频程序and联合文字处理程序
    # encoding:utf-8
    # coding by Mufasa 2018.11.09
    
    '''
    功能说明:
    1,文本标记功能整合进程序中;
    2,联网的http API接口整合;
      1,网络检查;
      2,断网异常检测;
      3,网络传数据、接数据;
    3,文本转语音的baidu-api接口整合
      1,语速,取值0-9,默认为5中语速;
      2,音调,取值0-9,默认为5中语调;
      3,音量,取值0-15,默认为5中音量;
      4,发音人选择, 0为女声,1为男声,3为情感合成-度逍遥,4为情感合成-度丫丫,默认为普通女;
    4,音频保存自主设置;
    '''
    
    
    import os
    import tkinter.filedialog
    import shutil
    import tkinter as tk
    from aip import AipSpeech
    
    
    def audio_out(string_,path,audio_set=None):
      #百度云网络网络配置
      APP_ID = '14636839'
      API_KEY = 'u34cdKCsR61tzM0r20Qnp3YM'
      SECRET_KEY = 'Rehs5G02EqbrRDDgGdPY9NKY2ZlCX8Oq'
      client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
      if len(string_)>500:
        num=int(len(string_)/500)
        #第一段音频
        result  = client.synthesis(string_[0:500], 'zh', 1, {'spd':audio_set[0],'pit':audio_set[1],'vol': audio_set[2],'per':audio_set[3]})
        open(path, 'wb').write(result)
        #中间段落的音频
        for i in range(1,num):
          result  = client.synthesis(string_[i*500:(i+1)*500], 'zh', 1, {'spd':audio_set[0],'pit':audio_set[1],'vol': audio_set[2],'per':audio_set[3]})
          open(path, 'ab').write(result)
        #最后一段音频
        result  = client.synthesis(string_[num*500:], 'zh', 1, {'spd':audio_set[0],'pit':audio_set[1],'vol': audio_set[2],'per':audio_set[3]})
        open(path, 'ab').write(result)
      else:
        result = client.synthesis(string_, 'zh', 1, {'spd':audio_set[0],'pit':audio_set[1],'vol': audio_set[2],'per':audio_set[3]})
        open(path, 'wb').write(result)
        
    
    def isConnected():
      import requests
      try:
        html = requests.get("http://www.baidu.com",timeout=2)
      except:
        return False
      return True
    
    
    
    def out_comma(data,num_1,num_2):
      txt=''
      if isinstance(data,(list)):#有不止一段
        for i in data:
          for n in range(num_1):
            txt=txt+i
        for n in range(num_2):
          for i in data:
            txt=txt+i
      else:#如果不是数组,即只有一个字符串
        for n in range(num_2):
          txt=txt+data
      return txt
    
    def code_a(path,data,set,audio_set):
      txt_mid=data.split('...')
      for i in range(len(txt_mid)):
        txt_mid[i]=txt_mid[i].split(',,,')
      #上面是对文本进行切片,下面进行相关处理
      txt=''
      for i in txt_mid:
        txt=txt+out_comma(i,set[0],set[1])
      for i in range(set[2]):
        for j in txt_mid:
          for k in j:
            txt=txt+k
      # import pyperclip
      # pyperclip.copy(txt)
      if path=='':
        import pyperclip
        pyperclip.copy(txt)
      else:
        fobj=open(path+'.txt','w')
        fobj.write(txt)
        fobj.close()
        #音频生成,需要额外读取audio_set数据
        
        audio_out(txt,path+'.mp3',audio_set)
    
    def code_b(path,data,set,audio_set):
      txt_mid=data.split('...')
      for i in range(len(txt_mid)):
        txt_mid[i]=txt_mid[i].split(',,,')
      txt_last=[]
      for i in txt_mid:
        for j in i:
          txt_last.append(j)
      # print(txt_last)
      # for i in txt_last:
        # print(i)
      # print('文本输出完毕')#这里是有。标点的
      txt_mark=[]
      txt_midd=data[:2]
      for i in data[2:]:
        txt_midd=txt_midd+i
        if txt_midd==',,,':
          txt_mark.append(0)
        elif txt_midd=='...':
          txt_mark.append(1)
        txt_midd=txt_midd[1:]
      # print(len(txt_last))#98
      # print(len(txt_mark))#97
      txt=''
      for circle in range(set[2]):
        for i in range(len(txt_mark)):
          if txt_mark[i]==0:
            for j in range(set[0]):
              txt+=txt_last[i]
              
          elif txt_mark[i]==1:
            for j in range(set[1]):
              txt+=txt_last[i]
      
      if path=='':
        import pyperclip
        pyperclip.copy(txt)
      else:
        fobj=open(path+'.txt','w')
        fobj.write(txt)
        fobj.close()
        #音频生成,需要额外读取audio_set数据
        audio_out(txt,path+'.mp3',audio_set)
    
    
    class assist():
      def write_txt_data(data,path):
        ls = os.linesep
        #data 为数组类型
        fobj = open(path,'w')
        fobj.writelines(['%s%s'%(x,ls)for x in data])
        fobj.close()
      def read_txt_data(path):
        fobj = open(path,'r')
        string = []
        for eachline in fobj:
          if eachline[:-1] != '':
            string.append(eachline[:-1])
        fobj.close()
        return string
    
    
    
    class out_setconfig(tk.Toplevel):
      def __init__(self):
        super().__init__()
        self.title('参数设置')# 弹窗界面
        self.data_set_path = os.path.dirname(os.path.realpath(__file__))+"\data_set.txt"
        isExists = os.path.exists(self.data_set_path )
        if isExists == False:     #bug,之前没有设置
          self.setup_eror()
        else:
          self.setup_UI()
    
      def setup_eror(self):
        row1 = tk.Frame(self)
        row1.pack(fill="x")
        tk.Label(row1, text='程序第一次运行!
    
    已进行参数初始化操作
    ').pack(side=tk.LEFT)
        row4 = tk.Frame(self)
        row4.pack(fill="x")
        assist.write_txt_data(['1','2','2','2','1','3','1','2','5','5','5','0'],self.data_set_path )
        
        
        tk.Button(row4, text="关闭窗口", command=self.cancel).pack(side=tk.RIGHT)
      def setup_UI(self):
        
        self.A1 = tk.StringVar()
        self.A2 = tk.StringVar()
        self.A3 = tk.StringVar()
        self.A0 = tk.IntVar()
        
        self.B1 = tk.StringVar()
        self.B2 = tk.StringVar()
        self.B3 = tk.StringVar()
        self.B0 = tk.IntVar()
        
        self.C0 = tk.StringVar()
        self.C1 = tk.StringVar()
        self.C2 = tk.StringVar()
        self.C3 = tk.StringVar()
        
        data = assist.read_txt_data(self.data_set_path)
        self.A1.set(data[1])
        self.A2.set(data[2])
        self.A3.set(data[3])
        d = int(data[0])
        self.A0.set(d)
        
        self.B1.set(data[5])
        self.B2.set(data[6])
        self.B3.set(data[7])
        d = int(data[4])
        self.B0.set(d)
        
        self.C0.set(data[8])
        self.C1.set(data[9])
        self.C2.set(data[10])
        self.C3.set(data[11])
        
        row1 = tk.Frame(self)
        row1.pack(fill="x")
        
        tk.Label(row1,text='程序A',width=30 ).grid(row=0,column=0,columnspan=2)
        tk.Label(row1,text='逗号次数:',width=15).grid(row=1,column=0)
        tk.Label(row1,text='句号次数:',width=15).grid(row=2,column=0)
        tk.Label(row1,text='全部文本次数',width=15).grid(row=3,column=0)
        tk.Label(row1,text='是否生成文本',width=15).grid(row=4,column=0)
        
        tk.Entry(row1, textvariable=self.A1, width=13).grid(row=1,column=1)
        tk.Entry(row1, textvariable=self.A2, width=13).grid(row=2,column=1)
        tk.Entry(row1, textvariable=self.A3, width=13).grid(row=3,column=1)
        checkA = tk.Checkbutton(row1, text="播放", variable=self.A0)
        checkA.select()
        checkA.grid(column=1, row=4, sticky=tk.W)
        
        
        tk.Label(row1,text='程序B',width=30 ).grid(row=0,column=2,columnspan=2)
        tk.Label(row1,text='逗号次数:',width=15).grid(row=1,column=2)
        tk.Label(row1,text='句号次数:',width=15).grid(row=2,column=2)
        tk.Label(row1,text='全部文本次数',width=15).grid(row=3,column=2)
        tk.Label(row1,text='是否生成文本',width=15).grid(row=4,column=2)
        
        tk.Entry(row1, textvariable=self.B1, width=13).grid(row=1,column=3)
        tk.Entry(row1, textvariable=self.B2, width=13).grid(row=2,column=3)
        tk.Entry(row1, textvariable=self.B3, width=13).grid(row=3,column=3)
        checkB = tk.Checkbutton(row1, text="播放", variable=self.B0)
        checkB.select()
        checkB.grid(column=3, row=4, sticky=tk.W)
        
        self.A0.set(int(data[0]))
        self.B0.set(int(data[4]))
        
        #这个是音频合成设置界面的
        tk.Label(row1,text='音频合成参数设置',width=30 ).grid(row=0,column=4,columnspan=2)
        tk.Label(row1,text='语速:',width=15).grid(row=1,column=4)
        tk.Label(row1,text='音调:',width=15).grid(row=2,column=4)
        tk.Label(row1,text='音量',width=15).grid(row=3,column=4)
        tk.Label(row1,text='发音人',width=15).grid(row=4,column=4)
        
        tk.Entry(row1, textvariable=self.C0, width=13).grid(row=1,column=5)
        tk.Entry(row1, textvariable=self.C1, width=13).grid(row=2,column=5)
        tk.Entry(row1, textvariable=self.C2, width=13).grid(row=3,column=5)
        tk.Entry(row1, textvariable=self.C3, width=13).grid(row=4,column=5)
        
        tk.Button(row1, text="取消", command=self.cancel, width=13).grid(row=9,column=0)
        tk.Button(row1, text="确定", command=self.ok, width=13).grid(row=9,column=4)
        tk.Button(row1, text="重置", command=self.reset, width=13).grid(row=9,column=5)
        
      def ok(self):
        self.userinfo_A = [self.A0.get(), self.A1.get(),self.A2.get(),self.A3.get()] # 设置数据
        self.userinfo_B = [self.B0.get(), self.B1.get(),self.B2.get(),self.B3.get()] # 设置数据
        self.userinfo_C = [self.C0.get(), self.C1.get(),self.C2.get(),self.C3.get()]
        
        self.userinfo_A[0] = str(self.userinfo_A[0])
        self.userinfo_B[3] = str(self.userinfo_B[3])
        
        
        data = self.userinfo_A + self.userinfo_B+self.userinfo_C
        # print(data)
        assist.write_txt_data(data,os.path.dirname(os.path.realpath(__file__))+"\data_set.txt")
        self.destroy() # 销毁窗口
      def cancel(self):
        self.userinfo = None # 空!
        self.destroy()
      
      def reset(self):
        assist.write_txt_data(['1','2','2','2','1','3','1','2','5','5','5','0'],self.data_set_path )
        self.destroy()
        
    ################################
    class main_code(tk.Tk):
      def __init__(self):
        super().__init__()
        self.audio_path = ""
        self.run = True
        self.vartext = tk.StringVar()
        
        self.txt=''
        networkisok=isConnected()
        if networkisok:
          self.mark_text = '网络连接正常!
    欢迎使用!'
        else:
          self.mark_text = '无网络连接!
    请确认网络连接是否完好
    无法连接百度云服务器'
        
        self.insert_text = ''
        self.vartext.set(self.mark_text)
        self.audio_path = ""
        self.title('音频标记')
        self.setupUI()
        
      def setupUI(self):
        
        row4 = tk.Frame(self)
        row4.pack(fill="x")
        
        tk.Button(row4,text='导入文本',width=27,command=self.select).grid(row=0,column=0)
        tk.Button(row4,text='设置文本',width=27,command=self.setup_config).grid(row=0,column=1)
        tk.Button(row4,text='生成文本',width=27,command=lambda:self.lay(entry1.get())).grid(row=0,column=2)
        
        label = tk.Label(row4,height=3,textvariable=self.vartext, bg='white', font=('黑体', 10), anchor='w').grid(row=1,column=0,columnspan=3)
        
        entry1 = tk.Entry(row4,width=85)
        entry1.grid(row=2,column=0,columnspan=3)
    
      def select(self):   #获取audio_path和audio_len
        self.audio_path = tk.filedialog.askopenfilename(initialdir = 'C:\Users\Administrator\Desktop',filetypes=( ("txt or docx files", "*.txt;*.docx"),("All files", "*.*")))
        if self.audio_path=="":
          self.vartext.set('未导入任何文件')
        else:
          p,f=os.path.split(self.audio_path)
          path = os.path.dirname(os.path.realpath(__file__)) + '\data\' + str(os.path.splitext(f)[0])
          # print(path)
          isExists = os.path.exists(path)
          if not isExists:  #如果不存在这个目录
            os.makedirs(path)  #创建一个新的路径
            shutil.copyfile(self.audio_path,path+'\'+f)
            
          if str(os.path.splitext(f)[1])=='.txt':
            file_object = open(self.audio_path,'r', encoding='UTF-8')
            try:
              file_context = file_object.read()
            finally:
              file_object.close()
            self.txt=file_context
            self.vartext.set('已导入txt文本')
          # print(1)
          elif str(os.path.splitext(f)[1])=='.docx':
            import docx
            document = docx.Document(self.audio_path)
            txt=''
            for paragraph in document.paragraphs:
              txt+=paragraph.text
            self.txt=txt
            self.vartext.set('已导入docx文本')
          
          self.audio_path=path+'\'+f#更新数据路径
          # print(self.audio_path)
        
      def lay(self,txt=''):
        mark_set=assist.read_txt_data(os.path.dirname(os.path.realpath(__file__))+"\data_set.txt")
        # print(mark_set)
        
        for i in range(len(mark_set)):
          mark_set[i]=int(mark_set[i])
        
        # mark_set=[1,1,0,1,0,0,1,4]
        if txt=='':
          if self.audio_path!='':
            p,f=os.path.split(self.audio_path)
            # print(p)
            if mark_set[0]==1:
              # code_a(p+'\'+str(os.path.splitext(f)[0])+'_A'+'.txt',self.txt,mark_set[1:4])
              code_a(p+'\'+str(os.path.splitext(f)[0])+'_A',self.txt,mark_set[1:4],mark_set[8:12])
              # self.vartext.set('已生成文本标记文件A')
            if mark_set[4]==1:
              # code_b(p+'\'+str(os.path.splitext(f)[0])+'_B'+'.txt',self.txt,mark_set[5:])
              code_b(p+'\'+str(os.path.splitext(f)[0])+'_B',self.txt,mark_set[5:],mark_set[8:12])
              # self.vartext.set('已生成文本标记文件B')
            if mark_set[0]==1 and mark_set[4]==1:
              self.vartext.set('已生成文本标记文件A、B和相应的音频')
            elif mark_set[0]==1 and mark_set[4]==0:
              self.vartext.set('已生成文本标记文件A和相应的音频')
            elif mark_set[0]==0 and mark_set[4]==1:
              self.vartext.set('已生成文本标记文件B和相应的音频')
            elif mark_set[0]==0 and mark_set[4]==0:
              self.vartext.set('设置中未确定生成何种文件
    请在设置文本中调整参数!')
          else:
            self.vartext.set('未导入任何文件')
        else:
          # print(txt)
          if mark_set[0]==1:
            code_a('',txt,mark_set[1:4])
            self.vartext.set('已将A程序处理后的文字复制到剪切板
    直接粘贴即可
    如果想使用B程序处理请在设置中将程序A取消,并勾选程序B')
          elif mark_set[4]==1:
            code_b('',txt,mark_set[5:])
            self.vartext.set('已将B程序处理后的文字复制到剪切板
    直接粘贴即可
    如果想使用A程序处理请在设置中将程序B取消,并勾选程序A')
        
        
      def setup_config(self):
        res = self.ask_userinfo()
        if res is None: return
      def ask_userinfo(self):
        inputDialog = out_setconfig()
        self.wait_window(inputDialog) # 这一句很重要!!!
    
    if __name__ == '__main__':
      app = main_code()
      app.mainloop()
  • 相关阅读:
    CentOS7上Mongodb安装、卸载
    CentOS7上Redis安装与配置
    vmware centos7系统虚拟机复制到其他电脑后不能联网问题解决
    流程项目点水笔记
    CentOS7图形界面启动报错unable to connect to X server
    本地Chrome测试JS代码报错:XMLHttpRequest cannot load
    iptables相关操作以及简单理解端口和服务之间关系
    git revert 和 git reset的区别
    Linux中_ALIGN宏背后的原理——内存对齐
    SPI协议及其工作原理浅析
  • 原文地址:https://www.cnblogs.com/Mufasa/p/10474542.html
Copyright © 2011-2022 走看看