zoukankan      html  css  js  c++  java
  • unimrcp-voice-activity语音检测

    研究 unimrcp有一段时间了,其中unimrcp voice acitve的算法,是遭到大家频繁吐槽。今天我们简单的介绍一下unimrcp voice activity 的这个简单粗暴的算法:

    unimrcp 语音活动检测是通过能量来控制的,设定几个常量:

    struct mpf_activity_detector_t {
    /* 静音检测阀值 */
    apr_size_t level_threshold;
    
    /* 转换成active状态的超时时长*/
    apr_size_t speech_timeout;
    /* 转换成inactive状态超时时长 */
    apr_size_t silence_timeout;
    /* 没有输入的超时时长 */
    apr_size_t noinput_timeout;
    
    /* 当前的状态 */
    mpf_detector_state_e state;
    /* 当前状态的保持的时长 */
    apr_size_t duration;
    };

    来看一下这几个参数的初始化的值,根据实际的测试,我们后期做过改动:

    /** Create activity detector */
    MPF_DECLARE(mpf_activity_detector_t*) mpf_activity_detector_create(apr_pool_t *pool)
    {
        mpf_activity_detector_t *detector = apr_palloc(pool,sizeof(mpf_activity_detector_t));
        detector->level_threshold = 50; /* 0 .. 255 */
        detector->speech_timeout = 300; /* 0.3 s  = 300*/
        detector->silence_timeout = 1000; /* 0.3 s  =300 */
        detector->noinput_timeout = 5000; /* 5 s =5000*/
        detector->duration = 0;
        detector->state = DETECTOR_STATE_INACTIVITY;
        return detector;
    }

    看一下重要的函数,能量的计算:

    根据frame的信息,对能量进行粗暴的累加,所以对于噪音,这个算法完全不可用。后面将会介绍如何采用webrtc的voice active来取代这个算法。

    static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
    {
        apr_size_t sum = 0;
    //计算多少个 apr_size_t count
    = frame->codec_frame.size/2;
    //初始值
    const apr_int16_t *cur = frame->codec_frame.buffer;
    //最后一个值
    const apr_int16_t *end = cur + count; for(; cur < end; cur++) { if(*cur < 0) { sum -= *cur; } else { sum += *cur; } } //取平均值,简单粗暴,被吐槽的原因 return sum / count; }

    最后看一下,状态切换的过程,下面mpf_activity_detector_process函数,主要是通过计算frame的平均值,来完成状态切换的逻辑:

    处理过程共有四个状态:

          ACTIVITY状态

          INACTIVITY状态

         TRANS_ACTIVITY状态

         TRANS_INACTIVITY状态

         其中TRANS状态是中间状态,再切换为ACTIVITY状态和INACTIVITY状态的时,需要经过这个状态来累计设定时长,如果满足了,才会切换,否则不予切换。

    /** Process current frame */
    MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)
    {
        mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;
        apr_size_t level = 0;
        if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {
            /* first, calculate current activity level of processed frame */
    //此处计算得到level的值
    level = mpf_activity_detector_level_calculate(frame); #if 0 apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector --------------------- [%"APR_SIZE_T_FMT"]",level); #endif } /*如果当前状态处于INACTIVITY状态,并且level 大于我们设定的阀值,开始向活动状态切换,但是并没有变成活动状态*/ if(detector->state == DETECTOR_STATE_INACTIVITY) { if(level >= detector->level_threshold) { /* start to detect activity */ mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION); } else { detector->duration += CODEC_FRAME_TIME_BASE; if(detector->duration >= detector->noinput_timeout) { /* detected noinput */ det_event = MPF_DETECTOR_EVENT_NOINPUT; } } } else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
    //处于向活动状态转换的过程。
    if(level >= detector->level_threshold) {
    //如果level 大于阀值 detector
    ->duration += CODEC_FRAME_TIME_BASE;
    //并且超过了设定了向活动状态转换的超时时长
    if(detector->duration >= detector->speech_timeout) { /* finally detected activity */
    //切换为活动状态
    det_event = MPF_DETECTOR_EVENT_ACTIVITY; mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY); } } else { /* fallback to inactivity */
    //降级为非活动状态
    mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY); } } else if(detector->state == DETECTOR_STATE_ACTIVITY) {
    //处于活动状态
    if(level >= detector->level_threshold) {
    //如果level大于阀值,增加duration detector
    ->duration += CODEC_FRAME_TIME_BASE; } else { /* start to detect inactivity */
    //准备转换成inactivity状态
    mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION); } } else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
    //处于inactivity transtion状态
    if(level >= detector->level_threshold) { /* fallback to activity */
    //如果大于阀值了,则回归到activity状态
    mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY); } else {
    //如果检测仍然小于阀值,增加判断时长,如果大于设定的时长了,则进入inactivity状态。 detector
    ->duration += CODEC_FRAME_TIME_BASE; if(detector->duration >= detector->silence_timeout) { /* detected inactivity */ det_event = MPF_DETECTOR_EVENT_INACTIVITY; mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY); } } } return det_event; }

     

  • 相关阅读:
    PyCharm设置改变字体大小的快捷键
    python中的字符串
    python入门知识
    css3(border-radius)边框圆角详解
    js中__proto__和prototype的区别和关系?
    常见的浏览器兼容问题
    Meta http-equiv属性详解(转)
    WinForm界面设计-Button添加背景图去边框
    vs2015 c# winfrom应用程序打包成64位
    vsto-Word相关操作
  • 原文地址:https://www.cnblogs.com/damizhou/p/11308236.html
Copyright © 2011-2022 走看看