zoukankan      html  css  js  c++  java
  • mfcc的特征提取python 代码实现和解析

     1 #!/usr/bin/python
     2 # -*- coding: UTF-8 -*-
     3 
     4 import numpy
     5 import scipy.io.wavfile
     6 from matplotlib import pyplot as plt
     7 from scipy.fftpack import dct
     8 
     9 sample_rate,signal=scipy.io.wavfile.read('stop.wav')
    10 
    11 print(sample_rate,len(signal))
    12 #读取前3.5s 的数据
    13 signal=signal[0:int(3.5*sample_rate)]
    14 print(signal)
    15 
    16 
    17 
    18 #预先处理
    19 pre_emphasis = 0.97
    20 emphasized_signal = numpy.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
    21 
    22 
    23 frame_size=0.025
    24 frame_stride=0.1
    25 frame_length,frame_step=frame_size*sample_rate,frame_stride*sample_rate
    26 signal_length=len(emphasized_signal)
    27 frame_length=int(round(frame_length))
    28 frame_step=int(round(frame_step))
    29 num_frames=int(numpy.ceil(float(numpy.abs(signal_length-frame_length))/frame_step))
    30 
    31 
    32 pad_signal_length=num_frames*frame_step+frame_length
    33 z=numpy.zeros((pad_signal_length-signal_length))
    34 pad_signal=numpy.append(emphasized_signal,z)
    35 
    36 
    37 indices = numpy.tile(numpy.arange(0, frame_length), (num_frames, 1)) + numpy.tile(numpy.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    38 
    39 frames = pad_signal[numpy.mat(indices).astype(numpy.int32, copy=False)]
    40 
    41 #加上汉明窗
    42 frames *= numpy.hamming(frame_length)
    43 # frames *= 0.54 - 0.46 * numpy.cos((2 * numpy.pi * n) / (frame_length - 1))  # Explicit Implementation **
    44 
    45 #傅立叶变换和功率谱
    46 NFFT = 512
    47 mag_frames = numpy.absolute(numpy.fft.rfft(frames, NFFT))  # Magnitude of the FFT
    48 #print(mag_frames.shape)
    49 pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2))  # Power Spectrum
    50 
    51 
    52 
    53 low_freq_mel = 0
    54 #将频率转换为Mel
    55 nfilt = 40
    56 high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 2) / 700))
    57 mel_points = numpy.linspace(low_freq_mel, high_freq_mel, nfilt + 2)  # Equally spaced in Mel scale
    58 hz_points = (700 * (10**(mel_points / 2595) - 1))  # Convert Mel to Hz
    59 
    60 bin = numpy.floor((NFFT + 1) * hz_points / sample_rate)
    61 
    62 fbank = numpy.zeros((nfilt, int(numpy.floor(NFFT / 2 + 1))))
    63 
    64 for m in range(1, nfilt + 1):
    65     f_m_minus = int(bin[m - 1])   # left
    66     f_m = int(bin[m])             # center
    67     f_m_plus = int(bin[m + 1])    # right
    68     for k in range(f_m_minus, f_m):
    69         fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
    70     for k in range(f_m, f_m_plus):
    71         fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    72 filter_banks = numpy.dot(pow_frames, fbank.T)
    73 filter_banks = numpy.where(filter_banks == 0, numpy.finfo(float).eps, filter_banks)  # Numerical Stability
    74 filter_banks = 20 * numpy.log10(filter_banks)  # dB
    75 
    76 num_ceps = 12
    77 mfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1 : (num_ceps + 1)]
    78 (nframes, ncoeff) = mfcc.shape
    79 
    80 n = numpy.arange(ncoeff)
    81 cep_lifter =22
    82 lift = 1 + (cep_lifter / 2) * numpy.sin(numpy.pi * n / cep_lifter)
    83 mfcc *= lift  #*
    84 
    85 #filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8)
    86 mfcc -= (numpy.mean(mfcc, axis=0) + 1e-8)
    87 
    88 print(mfcc.shape)
    89 plt.plot(filter_banks)
    90 
    91 plt.show()

    测试结果:

  • 相关阅读:
    2019.6.20刷题统计
    36 线程 队列 守护线程 互斥锁 死锁 可重入锁 信号量
    35 守护进程 互斥锁 IPC 共享内存 的方式 生产者消费者模型
    34 进程 pid ppid 并发与并行,阻塞与非阻塞 join函数 process对象 孤儿进程与僵尸进程
    33 udp 域名 进程
    32 粘包 文件传输
    31 socket客户端. 服务器 异常 语法
    30 网络编程
    29 元类 异常
    26 封装 反射 常用内置函数
  • 原文地址:https://www.cnblogs.com/dylancao/p/9790707.html
Copyright © 2011-2022 走看看