zoukankan      html  css  js  c++  java
  • python3-声音处理

    先来说下二进制读写文件,这需要struct库

    #二进制文件读写
    import struct
    a=1
    b=-1
    # print(struct.pack("h",b))
    # print(struct.pack("i",b))
    
    f=open("bbb.bin","wb")
    f.write(struct.pack("h",a))#对a装包,并写入
    f.write(struct.pack("h",b))
    f.close()
    
    f=open("bbb.bin",'rb')
    b1=f.read(2)
    a=struct.unpack("h",b1)#解包操作
    b2=f.read(2)
    b=struct.unpack("h",b2)
    print(a,b)
    

    然后是通过python创造一段声音

    import wave
    import math
    import struct
    f=wave.open("v0.wav","w")
    f.setframerate(8000)#声音频率
    f.setnchannels(1)#声道数
    f.setsampwidth(2)#声音宽度
    t=0#时间
    v=0.5#音量
    dt=1/8000.0#录入声音的时间
    
    while t<5:
        s=math.sin(t*math.pi*2*800)*v*32768#设置声音频率为-32768到32768,并且为正弦变化曲线
        s=int(s)
        fd=struct.pack("h",s)#二进制写入
        f.writeframes(fd)
        t+=dt
    f.close()

    运用这个原理可以写一个简单的声音文件,比如说两只老虎

    import wave
    import math
    import struct
    ff=wave.open("v1.wav","w")
    ff.setframerate(8000)
    ff.setnchannels(1)
    ff.setsampwidth(2)
    
    
    
    
    def wv(t=0,f=0,v=0.5,wf=ff,sr=8000):
        '''
        t:写入时长
        f:声音频率
        v:音量
        wf:一个可以写入的音频文件
        sr:采样率
        '''
        tt=0
        dt=1.0/sr
        while tt<=t:
            s=math.sin(tt*math.pi*2*f)*v*32768#采样,调节音量,映射到[-2^15,2^15)
            s=int(s)
            
            fd=struct.pack("h",s)#转换成8bit二进制数据
            wf.writeframes(fd)#写入音频文件
            tt+=dt#时间流逝
    
    note={"1":262,"2":294,"3":330,"4":349,"5":392,"6":440,"7":494,"6-":220,"0":0}
    n=[
        "1","2","3","1","1","2","3","1","0",
        "3","4","5","0","3","4","5","0",
        "5","6","5","4","3","1","0","5","6","5","4","3","1","0",
        "2","6-","1","0","2","6-","1"
    ]
    tm=[
        2,2,2,2,2,2,2,2,1,
        2,2,2,1.5,2,2,2,2,
        1,1,1,1,2,2,1,1,1,1,1,1,2,1,
        2,2,2,2,2,2,2
    ]
    
    for i in range(len(n)):
        wv(tm[i]/4.0,note[n[i]])
    
    
    
    
    ff.close()
    #河大校歌
    import
    wave import math import struct ff=wave.open("henu.wav","w") ff.setframerate(8000) ff.setnchannels(1) ff.setsampwidth(2) ff.getsampwidth def wv(t=0,f=0,v=0.5,wf=ff,sr=8000): ''' t:写入时长 f:声音频率 v:音量 wf:一个可以写入的音频文件 sr:采样率 ''' tt=0 dt=1.0/sr while tt<=t: s=math.sin(tt*math.pi*2*f)*v*math.pow(math.e,-2.5*tt)*32768#采样,调节音量,映射到[-2^15,2^15) s=int(s) fd=struct.pack("h",s)#转换成8bit二进制数据 wf.writeframes(fd)#写入音频文件 tt+=dt#时间流逝 note={"-":0,"0":0,"1---":49,"1#---":52,"2b---":52,"2---":55,"2#---":58,"3b---":58,"3---":62,"4---":65,"4#---":69,"5b---":69,"5---":73,"5#---":78,"6b---":78,"6---":82,"6#---":87,"7b---":87,"7---":92,"1--":98,"1#--":104,"2b--":104,"2--":110,"2#--":117,"3b--":117,"3--":123,"4--":131,"4#--":139,"5b--":139,"5--":147,"5#--":156,"6b--":156,"6--":165,"6#--":175,"7b--":175,"7--":185,"1-":196,"1#-":208,"2b-":208,"2-":220,"2#-":233,"3b-":233,"3-":247,"4-":262,"4#-":277,"5b-":277,"5-":294,"5#-":311,"6b-":311,"6-":330,"6#-":349,"7b-":349,"7-":370,"1":392,"1#":415,"2b":415,"2":440,"2#":466,"3b":466,"3":494,"4":523,"4#":554,"5b":554,"5":587,"5#":622,"6b":622,"6":659,"6#":698,"7b":698,"7":740,"1+":784,"1#+":831,"2b+":831,"2+":880,"2#+":932,"3b+":932,"3+":988,"4+":1047,"4#+":1109,"5b+":1109,"5+":1175,"5#+":1245,"6b+":1245,"6+":1319,"6#+":1397,"7b+":1397,"7+":1480,"1++":1568,"1#++":1661,"2b++":1661,"2++":1760,"2#++":1865,"3b++":1865,"3++":1976,"4++":2093,"4#++":2217,"5b++":2217,"5++":2349,"5#++":2489,"6b++":2489} n=[ "5-","1","1","1","7-", "1","3","3","3","2", "5","5","6","5", "4","3","2","-", "3","2","1","7-","6-", "7-","1","2","3","4", "6","5","4","3", "2","3","1","5","-", "2","3","4","4", "6","6","5","4","3", "4","2","3","5", "2","3","2","7-","5-", "1","3","5","6","5","-", "6","4","2","-", "2","3","4","2","3","-", "6-","7-","1","-" ] tm=[ 2,2,2,1,1, 2,2,2,1,1, 3,1,2,2, 2,2,2,2, 3,1,1,1,2, 3,1,1,1,2, 2,2,2,2, 2,1,1,2,2, 3,1,2,2, 3,1,1,1,2, 2,2,2,2, 3,1,1,1,2, 1,1,1,1,2,2, 2,2,2,2, 1,1,1,1,2,2, 3,1,3,4 ] for i in range(len(n)): wv(tm[i]/2.0,note[n[i]]) ff.close()

    接下来是比较高级的了,显示声音的音波

    import matplotlib.pyplot as plt
    import wave
    import struct

    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)#如果参数是349的意思是:将画布分割成3行4列,图像画在从左到右从上到下的第9块
    plt.ion()#使matplotlib的显示模式转换为交互(interactive)模式。即使在脚本中遇到plt.show(),代码还是会继续执行。
    read_size=200#每次绘画的帧数
    pass_f=0#跳过前pass的帧数
    draw_channel=0
    stay_time=1.0

    file=wave.open("henu.wav","r")

    frame_rate=file.getframerate()
    frames = file.getnframes()#得到频率
    channels=file.getnchannels()#得到声道数
    draw_channel=draw_channel %channels
    i=0
    x_data=[x / read_size for x in range(read_size)]
    fmt="h" * (read_size * channels)

    while i<frames:
    fs=file.readframes(read_size)
    i+=read_size
    if pass_f > 0:
    pass_f-=1
    continue
    f_data = struct .unpack(fmt,fs) # f_data中包含多个声道的声音
    y_data =[]
    for j in range(0,len(f_data)):
    if j%channels==draw_channel:
    y_data.append(f_data[j]/32768.0)
    lines = ax.plot(x_data, y_data, 'g-', lw=1)
    plt.pause(stay_time)
    ax.lines.remove(lines[0])
    file.close()

    这个是拟合一个图像的曲线,机器学习????

    #训练一个多项式,来拟合一条曲线
    from PIL import Image
    import matplotlib.pyplot as plt
    import numpy as np
    import random
    import math
    import tensorflow as tf
    
    
    pix2=math.pi*2
    
    img=np.array(Image.open("./p2.bmp"),dtype=np.int32)
    #(249, 277, 3)图像的 高度,宽度,通道rgb
    #Y中点125
    data=[]
    a=img[:,:,0]+img[:,:,1]+img[:,:,2]
    h,w,_=np.shape(img)
    nw=-1
    for j in range(w):
        for i in range(h):
            if a[i,j]<200:
                data.append([j*1.0/w,(125.0-i)*2.0/w])
                if j%13==0 and nw!=j:
                    nw=j
    data_len=len(data)
    #y=a1*sin(2pix*b1)+a2*sin(2pix*b2)+a3*sin(2pix*b3)...
    
    
    #画图
    fig=plt.figure()#生成图像框
    ax=fig.add_subplot(1,1,1)#编号
    ax.scatter([ w[0] for w in data],[w[1] for w in data],linewidths=1)
    plt.ion()
    #######################################################
    #模型
    w_size=8
    batch_size=64
    
    x=tf.placeholder(dtype=tf.float32, shape=[None,1])
    y=tf.placeholder(dtype=tf.float32, shape=[None,1])
    w=tf.Variable(tf.truncated_normal(shape=[w_size,1],mean=0.0,stddev=2/w_size,dtype=tf.float32))
    #c=tf.constant([[(i+1)*2*math.pi for i in range(w_size)]],tf.float32)
    c=tf.Variable(tf.constant([[(i+1)*2*math.pi for i in range(w_size)]],tf.float32))
    res=tf.matmul(tf.sin(tf.matmul(x,c)),w)
    loss=tf.reduce_mean(tf.abs((res-y)))
    
    train_step=tf.train.AdamOptimizer(0.02).minimize(loss)
    init=tf.global_variables_initializer()
    sess=tf.Session()
    sess.run(init)
    for i in range(1,10000):
        rnddata=[random.randint(1,data_len-1) for w in range(batch_size)]
        x_d=[[data[p][0]] for p in rnddata]
        y_d=[[data[p][1]] for p in rnddata]
        ls,st=sess.run([loss,train_step],feed_dict={x:x_d,y:y_d})
        print(i,":",ls)
        
        if i%100==0:
            x_data=[[j/500.0] for j in range(500)]
            y_data,ww,cc=sess.run([res,w,c],feed_dict={x:x_data})
            print(ww)
            print(cc)
            lines=ax.plot([j[0] for j in x_data],[j[0] for j in y_data],'r-',lw=1)
            plt.pause(1)
            ax.lines.remove(lines[0])
            
        
        
        
        
        

    最后是你输入文字然后让它发音,前提是你要有汉字表和发音表

    t2pinyin={}
    yunmu="āáǎàōóǒòēéěèīíǐìūúǔùǖǘǚǜ"
    yindiao="123412341234123412341234"
    rp="aaaaooooeeeeiiiiuuuuvvvv"
    for i in range(1,10):#总共有十个文件,循环打开
    fn="./pinyin1/h"+str(i)+".txt"
    py=open(fn,"r",encoding="utf-8")#打开文件名 只读 utf-8
    line1=py.readline() #读取一行,再次调用就是下一行,先把第一行文字读进来
    line2="" #读拼音
    while len(line2)<=10:#查找哪一行是拼音
    line2=py.readline()
    line1=line1.replace(" ","")#替换换行
    line2=line2.replace(" ","").replace("ü","v")
    py.close()
    pinyin=line2.split(" ")#变换成列表

    for j in range(len(line1)):
    word=line1[j]#word和pyj照应
    pyj=pinyin[j]
    flag=0
    for k in range(24):
    if pyj.find(yunmu[k])>=0:#查找拼音里的韵母
    pyj=pyj.replace(yunmu[k],rp[k])+yindiao[k]#替换,并标记声调
    flag=1
    break
    if flag==0:#没找到就是轻声
    pyj=pyj+"0"
    t2pinyin[word] = pyj
    print(t2pinyin["大"],t2pinyin["家"],t2pinyin["好"])

    s="我在学拍森"
    import wave
    wf=wave.open("py.wav","w")
    wf.setframerate(16000)#设置频率的最大值
    wf.setnchannels(2)#设置声道数
    wf.setsampwidth(2)#设置声音宽度

    for w in s:
    if w in t2pinyin:#找s中的每个字的读音
    vv=t2pinyin[w]
    file_name = "./pinyin/" + vv + ".wav"
    speak_file=wave.open(file_name,"r")
    data=speak_file.readframes(100000000)#将该文件下的前100000000个频率赋值给data
    wf.writeframes(data)
    speak_file.close()
    wf.close()
  • 相关阅读:
    给列表单元格加背景色
    Log4j最简入门及实例
    MySQL 按指定字段自定义列表排序
    MySQL逗号分割字段的列转行
    利用MySQL统计一列中不同值的数量方法示例
    使用docx4j编程式地创建复杂的Word(.docx)文档
    向Docx4j生成的word文档中添加布局--第二部分
    向Docx4j生成的word文档添加图片和布局--第一部分
    使用Docx4j创建word文档
    [简单]docx4j常用方法小结
  • 原文地址:https://www.cnblogs.com/wpbing/p/9322872.html
Copyright © 2011-2022 走看看